about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-11-26 20:05:29 +0100
committerptitSeb <sebastien.chev@gmail.com>2022-11-26 20:05:29 +0100
commita186b22d6e1b0ae92a45dba84d3f67e0248aff89 (patch)
tree2f7322f9a8b4f49836c9808a8d8424efd7718bcb /src
parenta43186d137bb06f0bbdc4da4560dcee5023974eb (diff)
downloadbox64-a186b22d6e1b0ae92a45dba84d3f67e0248aff89.tar.gz
box64-a186b22d6e1b0ae92a45dba84d3f67e0248aff89.zip
[DYNAREC] Refactored Strong Memory Model emulation
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_00.c48
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_0f.c43
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c16
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_66.c20
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_660f.c38
-rw-r--r--src/dynarec/arm64/dynarec_arm64_6664.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66f0.c24
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_67.c9
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c68
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_f20f.c5
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_f30f.c11
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_helper.c5
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_helper.h58
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_pass3.h5
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_private.h6
-rwxr-xr-xsrc/dynarec/dynarec_native_pass.c1
-rwxr-xr-xsrc/main.c2
17 files changed, 256 insertions, 107 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 3b3af4bf..360f4b9a 100755
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -530,6 +530,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(MODREG) {   // reg <= reg
                     SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3));
                 } else {                    // mem <= reg
+                    SMREAD();
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);
                     LDRSW_U12(gd, ed, fixedaddress);
                 }
@@ -537,6 +538,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 if(MODREG) {   // reg <= reg
                     MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));
                 } else {                    // mem <= reg
+                    SMREAD();
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);
                     LDRw_U12(gd, ed, fixedaddress);
                 }
@@ -860,7 +862,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 BFIx(gb1, x1, gb2*8, 8);
                 BFIx(eb1, x4, eb2*8, 8);
             } else {
-                DMB_ISH();
+                SMDMB();
                 GETGB(x4);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0);
                 MARKLOCK;
@@ -869,7 +871,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 // do the swap 14 -> strb(ed), 1 -> gd
                 STLXRB(x3, x4, ed);
                 CBNZx_MARKLOCK(x3);
-                DMB_ISH();
+                SMDMB();
                 BFIx(gb1, x1, gb2*8, 8);
             }
             break;
@@ -885,7 +887,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {
                 GETGD;
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0);
-                DMB_ISH();
+                SMDMB();
                 TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
                 B_MARK(cNE);
                 MARKLOCK;
@@ -897,7 +899,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 LDRxw_U12(x1, ed, 0);
                 STRxw_U12(gd, ed, 0);
                 MARK2;
-                DMB_ISH();
+                SMDMB();
                 MOVxw_REG(gd, x1);
             }
             break;
@@ -931,10 +933,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, &lock, 0, 0);
                 STRB_U12(gd, ed, fixedaddress);
-                if(lock || (box64_dynarec_strongmem && 
-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {
-                    DMB_ISH();
-                }
+                SMWRITELOCK(lock);
             }
             break;
         case 0x89:
@@ -946,10 +945,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {                    // mem <= reg
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0);
                 STRxw_U12(gd, ed, fixedaddress);
-                if(lock || (box64_dynarec_strongmem && 
-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {
-                    DMB_ISH();
-                }
+                SMWRITELOCK(lock);
             }
             break;
         case 0x8A:
@@ -980,10 +976,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 }
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, &lock, 0, 0);
-                if(lock || (box64_dynarec_strongmem && 
-                 (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier)))) {
-                    DMB_ISH();
-                }
+                SMREADLOCK(lock);
                 LDRB_U12(x4, wback, fixedaddress);
                 ed = x4;
             }
@@ -997,10 +990,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));
             } else {
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0);
-                if(lock || (box64_dynarec_strongmem && 
-                 (dyn->insts[ninst].x64.barrier || !ninst || box64_dynarec_strongmem>1 || (ninst && dyn->insts[ninst-1].x64.barrier)))) {
-                    DMB_ISH();
-                }
+                SMREADLOCK(lock);
                 LDRxw_U12(gd, ed, fixedaddress);
             }
             break;
@@ -1013,6 +1003,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                 LDRH_U12(x3, xEmu, offsetof(x64emu_t, segs[(nextop&0x38)>>3]));
                 STRH_U12(x3, ed, fixedaddress);
+                SMWRITE2();
             }
             break;
         case 0x8D:
@@ -1037,6 +1028,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if((nextop&0xC0)==0xC0) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
             } else {
+                SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<2, 0, rex, NULL, 0, 0);
                 LDRH_U12(x1, ed, fixedaddress);
                 ed = x1;
@@ -1144,12 +1136,14 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             u64 = F64;
             MOV64x(x1, u64);
             STRB_U12(xRAX, x1, 0);
+            SMWRITE();
             break;
         case 0xA3:
             INST_NAME("MOV Od,EAX");
             u64 = F64;
             MOV64x(x1, u64);
             STRxw_U12(xRAX, x1, 0);
+            SMWRITE();
             break;
         case 0xA4:
             if(rep) {
@@ -1523,7 +1517,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETED(1);
                     u8 = (F8)&(rex.w?0x3f:0x1f);
                     emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4);
-                    if(u8) { WBACK; }
                     break;
                 case 1:
                     INST_NAME("ROR Ed, Ib");
@@ -1531,7 +1524,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETED(1);
                     u8 = (F8)&(rex.w?0x3f:0x1f);
                     emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4);
-                    if(u8) { WBACK; }
                     break;
                 case 2:
                     INST_NAME("RCL Ed, Ib");
@@ -1630,10 +1622,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 u8 = F8;
                 MOV32w(x3, u8);
                 STRB_U12(x3, ed, fixedaddress);
-                if(lock || (box64_dynarec_strongmem && 
-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {
-                    DMB_ISH();
-                }
+                SMWRITELOCK(lock);
             }
             break;
         case 0xC7:
@@ -1648,10 +1637,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 i64 = F32S;
                 MOV64xw(x3, i64);
                 STRxw_U12(x3, ed, fixedaddress);
-                if(lock || (box64_dynarec_strongmem && 
-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {
-                    DMB_ISH();
-                }
+                SMWRITELOCK(lock);
             }
             break;
 
@@ -2513,7 +2499,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 4: // JMP Ed
                     INST_NAME("JMP Ed");
-                    BARRIER(BARRIER_FULL);
+                    BARRIER(BARRIER_FLOAT);
                     GETEDx(0);
                     jump_to_next(dyn, 0, ed, ninst);
                     *need_epilog = 0;
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 427177be..3e958790 100755
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -39,6 +39,7 @@
     if(MODREG) {                                                    \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);  \

     } else {                                                        \

+        SMREAD();                                                   \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D); \

         a = fpu_get_scratch(dyn);                                   \

         VLDR128_U12(a, ed, fixedaddress);                           \

@@ -52,6 +53,7 @@
     if(MODREG) {                                                \

         a = mmx_get_reg(dyn, ninst, x1, x2, x3, (nextop&7));    \

     } else {                                                    \

+        SMREAD();                                               \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \

         a = fpu_get_scratch(dyn);                               \

         VLDR64_U12(a, ed, fixedaddress);                        \

@@ -60,6 +62,7 @@
 #define PUTEM(a)                            \

     if(!MODREG) {                           \

         VSTR64_U12(a, ed, fixedaddress);    \

+        SMWRITE2();                         \

     }

 

 uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)

@@ -108,6 +111,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 

         case 0x05:

             INST_NAME("SYSCALL");

+            SMEND();

             GETIP(addr);

             STORE_XEMU_CALL(xRIP);

             CALL_S(x64Syscall, -1);

@@ -172,6 +176,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);   // no alignment issue with ARMv8 NEON :)

+                SMWRITE2();

             }

             break;

         case 0x11:

@@ -186,6 +191,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VSTR128_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

         case 0x12:

@@ -198,6 +204,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 INST_NAME("MOVLPS Gx,Ex");

                 GETGX(v0, 1);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

                 VLD1_64(v0, 0, ed);

             }

@@ -212,11 +219,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

                 VST1_64(v0, 0, ed);  // better to use VST1 than VSTR_64, to avoid NEON->VFPU transfert I assume

+                SMWRITE2();

             }

             break;

         case 0x14:

             INST_NAME("UNPCKLPS Gx, Ex");

             nextop = F8;

+            SMREAD();

             GETEX(q0, 0, 0);

             GETGX(v0, 1);

             VZIP1Q_32(v0, v0, q0);

@@ -224,6 +233,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x15:

             INST_NAME("UNPCKHPS Gx, Ex");

             nextop = F8;

+            SMREAD();

             GETEX(q0, 0, 0);

             GETGX(v0, 1);

             VZIP2Q_32(v0, v0, q0);

@@ -237,6 +247,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VMOVeD(v0, 1, v1, 0);

             } else {

                 INST_NAME("MOVHPS Gx,Ex");

+                SMREAD();

                 GETGX(v0, 1);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

                 VLD1_64(v0, 1, ed);

@@ -252,6 +263,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

                 VST1_64(v0, 1, ed);

+                SMWRITE2();

             }

             break;

         case 0x18:

@@ -303,6 +315,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 VMOVQ(v0, v1);

             } else {

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);

             }

@@ -319,6 +332,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VSTR128_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

 

@@ -348,6 +362,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0);

             } else {

                 s0 = fpu_get_scratch(dyn);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

                 VLDR32_U12(s0, ed, fixedaddress);

             }

@@ -407,6 +422,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(MODREG) {

                         REVxw(gd, xRAX+(nextop&7)+(rex.b<<3));

                     } else {

+                        SMREAD();

                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                         LDRxw_U12(gd, ed, fixedaddress);

                         REVxw(gd, gd);

@@ -419,6 +435,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(MODREG) {   // reg <= reg

                         REVxw(xRAX+(nextop&7)+(rex.b<<3), gd);

                     } else {                    // mem <= reg

+                        SMREAD();

                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                         REVxw(x1, gd);

                         STRxw_U12(x1, ed, fixedaddress);

@@ -491,6 +508,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 BFIx(gd, x1, 3, 1);

             } else {

                 // EX is memory

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, (0xfff<<3)-8, 7, rex, NULL, 0, 0);

                 LDRx_U12(x1, ed, fixedaddress+0);

                 LSRx(x1, x1, 31);

@@ -1034,6 +1052,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 } else {

                     VSTR32_U12(v0, ed, fixedaddress);

                 }

+                SMWRITE2();

             }

             break;

         case 0x7F:

@@ -1046,6 +1065,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VSTR64_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

 

@@ -1096,6 +1116,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {                                    \

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0); \

                 STRB_U12(x3, ed, fixedaddress);         \

+                SMWRITE();                              \

             }

 

         GOCOND(0x90, "SET", "Eb");

@@ -1115,6 +1136,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(MODREG) {

                 ed = xRAX+(nextop&7)+(rex.b<<3);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5)

                 ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4;

@@ -1162,6 +1184,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 wback = 0;

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5)

                 ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4;

@@ -1187,6 +1210,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             CSELxw(ed, ed, x4, cNE);

             if(wback) {

                 STRxw_U12(ed, wback, fixedaddress);

+                SMWRITE();

             }

             break;

         case 0xAC:

@@ -1216,12 +1240,15 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             nextop = F8;

             if((nextop&0xF8)==0xE8) {

                 INST_NAME("LFENCE");

+                SMDMB();

             } else

             if((nextop&0xF8)==0xF0) {

                 INST_NAME("MFENCE");

+                SMDMB();

             } else

             if((nextop&0xF8)==0xF8) {

                 INST_NAME("SFENCE");

+                SMDMB();

             } else {

                 switch((nextop>>3)&7) {

                     case 0:

@@ -1320,6 +1347,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 wback = 0;

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5)

                 ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4;

@@ -1345,6 +1373,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             CSELxw(ed, ed, x4, cEQ);

             if(wback) {

                 STRxw_U12(ed, wback, fixedaddress);

+                SMWRITE();

             }

             break;

 

@@ -1363,6 +1392,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 }

                 UBFXxw(gd, eb1, eb2*8, 8);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0);

                 LDRB_U12(gd, ed, fixedaddress);

             }

@@ -1375,6 +1405,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 UBFXxw(gd, ed, 0, 16);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, NULL, 0, 0);

                 LDRH_U12(gd, ed, fixedaddress);

             }

@@ -1391,6 +1422,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     if(MODREG) {

                         ed = xRAX+(nextop&7)+(rex.b<<3);

                     } else {

+                        SMREAD();

                         addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1);

                         LDRxw_U12(x1, wback, fixedaddress);

                         ed = x1;

@@ -1407,6 +1439,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ed = xRAX+(nextop&7)+(rex.b<<3);

                         wback = 0;

                     } else {

+                        SMREAD();

                         addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1);

                         LDRxw_U12(x1, wback, fixedaddress);

                         ed = x1;

@@ -1419,6 +1452,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     EORxw_REG_LSL(ed, ed, x4, u8);

                     if(wback) {

                         STRxw_U12(ed, wback, fixedaddress);

+                        SMWRITE();

                     }

                     MARK3;

                     break;

@@ -1430,6 +1464,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ed = xRAX+(nextop&7)+(rex.b<<3);

                         wback = 0;

                     } else {

+                        SMREAD();

                         addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1);

                         LDRxw_U12(x1, wback, fixedaddress);

                         ed = x1;

@@ -1442,6 +1477,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     EORxw_REG_LSL(ed, ed, x4, u8);

                     if(wback) {

                         STRxw_U12(ed, wback, fixedaddress);

+                        SMWRITE();

                     }

                     MARK3;

                     break;

@@ -1453,6 +1489,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         ed = xRAX+(nextop&7)+(rex.b<<3);

                         wback = 0;

                     } else {

+                        SMREAD();

                         addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 1);

                         LDRxw_U12(x1, wback, fixedaddress);

                         ed = x1;

@@ -1464,6 +1501,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     EORxw_REG_LSL(ed, ed, x4, u8);

                     if(wback) {

                         STRxw_U12(ed, wback, fixedaddress);

+                        SMWRITE();

                     }

                     MARK3;

                     break;

@@ -1481,6 +1519,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 wback = 0;

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 ASRxw(x1, gd, 5+rex.w); // r1 = (gd>>5)

                 ADDx_REG_LSL(x3, wback, x1, 2+rex.w); //(&ed)+=r1*4;

@@ -1505,6 +1544,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             EORxw_REG(ed, ed, x4);

             if(wback) {

                 STRxw_U12(ed, wback, fixedaddress);

+                SMWRITE();

             }

             break;

         case 0xBC:

@@ -1553,6 +1593,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 }

                 SBFXxw(gd, wback, wb2, 8);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0);

                 LDRSBxw_U12(gd, ed, fixedaddress);

             }

@@ -1565,6 +1606,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 SXTHxw(gd, ed);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x3, &fixedaddress, 0xfff<<1, 1, rex, NULL, 0, 0);

                 LDRSHxw_U12(gd, ed, fixedaddress);

             }

@@ -1663,6 +1705,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         VMOVeS(d0, i, v1, (u8>>(i*2)&3));

                     }

                 } else {

+                    SMREAD();

                     for(int i=2; i<4; ++i) {

                         ADDx_U12(x2, ed, (u8>>(i*2)&3)*4);

                         VLD1_32(d0, i, x2);

diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index ff735cff..6d4ac7d6 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -91,6 +91,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                                SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);
                                 ADDx_REG(x4, x4, ed);
                                 VLDR64_U12(v0, x4, fixedaddress); // upper part reseted
@@ -107,6 +108,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             } else {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
                                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                                SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);
                                 ADDx_REG(x4, x4, ed);
                                 VLDR32_U12(v0, x4, fixedaddress);
@@ -132,6 +134,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);
                                 ADDx_REG(x4, x4, ed);
                                 VSTR128_U12(v0, x4, fixedaddress);
+                                SMWRITE2();
                             }
                             break;
                         case 1:
@@ -148,6 +151,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);
                                 ADDx_REG(x4, x4, ed);
                                 VSTR64_U12(v0, x4, fixedaddress);
+                                SMWRITE2();
                             }
                             break;
                         case 2:
@@ -163,6 +167,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);
                                 ADDx_REG(x4, x4, ed);
                                 VSTR32_U12(v0, x4, fixedaddress);
+                                SMWRITE2();
                             }
                             break;
                         default:
@@ -182,6 +187,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 VMOVQ(v0, v1);
                             } else {
                                 grab_segdata(dyn, addr, ninst, x4, seg);
+                                SMREAD();
                                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);
                                 ADDx_REG(x4, x4, ed);
                                 VLDR128_U12(v0, ed, fixedaddress);
@@ -242,6 +248,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         }
                         UBFXxw(gd, eb1, eb2*8, 8);
                     } else {
+                        SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                         LDRB_REG(gd, ed, x4);
                     }
@@ -301,6 +308,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SXTWx(gd, xRAX+(nextop&7)+(rex.b<<3));
                 } else {                    // mem <= reg
                     grab_segdata(dyn, addr, ninst, x4, seg);
+                    SMREAD();
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                     LDRSW_REG(gd, ed, x4);
                 }
@@ -309,6 +317,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOVw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));
                 } else {                    // mem <= reg
                     grab_segdata(dyn, addr, ninst, x4, seg);
+                    SMREAD();
                     addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                     LDRw_REG(gd, ed, x4);
                 }
@@ -510,6 +519,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 }
             } else {
                 grab_segdata(dyn, addr, ninst, x4, seg);
+                SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                 LDRB_REG(x4, wback, x4);
                 ed = x4;
@@ -526,6 +536,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {                    // mem <= reg
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                 STRxw_REG(gd, ed, x4);
+                SMWRITE2();
             }
             break;
 
@@ -537,6 +548,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(MODREG) {   // reg <= reg
                 MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));
             } else {                    // mem <= reg
+                SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                 LDRxw_REG(gd, ed, x4);
             }
@@ -563,6 +575,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 u8 = F8;
                 MOV32w(x3, u8);
                 STRB_REG(x3, ed, x4);
+                SMWRITE2();
             }
             break;
         case 0xC7:
@@ -578,6 +591,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 i64 = F32S;
                 MOV64xw(x3, i64);
                 STRxw_REG(x3, ed, x4);
+                SMWRITE2();
             }
             break;
 
@@ -967,7 +981,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 4: // JMP Ed
                     INST_NAME("JMP Ed");
-                    BARRIER(1);
+                    BARRIER(BARRIER_FLOAT);
                     GETEDOx(x6, 0);
                     jump_to_next(dyn, 0, ed, ninst);
                     *need_epilog = 0;
diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index f46da3f9..493544ea 100755
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -444,7 +444,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 GETGD;

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0);

-                DMB_ISH();

+                SMDMB();

                 TSTx_mask(ed, 1, 0, 0);    // mask=1

                 B_MARK(cNE);

                 MARKLOCK;

@@ -456,7 +456,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 LDRH_U12(x1, ed, 0);

                 STRH_U12(gd, ed, 0);

                 MARK2;

-                DMB_ISH();

+                SMDMB();

                 BFIx(gd, x1, 0, 16);

             }

             break;

@@ -473,10 +473,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, &lock, 0, 0);

                 STRH_U12(gd, ed, fixedaddress);

-                if(lock || (box64_dynarec_strongmem && 

-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {

-                    DMB_ISH();

-                }

+                SMWRITELOCK(lock);

             }

             break;

         case 0x8B:

@@ -490,10 +487,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 }

             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, &lock, 0, 0);

-                if(lock || (box64_dynarec_strongmem && 

-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {

-                    DMB_ISH();

-                }

+                SMREADLOCK(lock);

                 LDRH_U12(x1, ed, fixedaddress);

                 BFIx(gd, x1, 0, 16);

             }

@@ -537,6 +531,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             u64 = F64;

             MOV64x(x1, u64);

             STRH_U12(xRAX, x1, 0);

+            SMWRITE();

             break;

 

         case 0xA5:

@@ -714,10 +709,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 u16 = F16;

                 MOV32w(x1, u16);

                 STRH_U12(x1, ed, fixedaddress);

-                if(lock || (box64_dynarec_strongmem && 

-                 (dyn->insts[ninst].x64.barrier || box64_dynarec_strongmem>1 || (dyn->insts[ninst+1].x64.barrier || dyn->insts[ninst+1].x64.jmp)))) {

-                    DMB_ISH();

-                }

+                SMWRITELOCK(lock);

             }

             break;

 

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 41c583da..cad47b37 100755
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -27,6 +27,7 @@
     if(MODREG) {                                                                                        \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \

     } else {                                                                                            \

+        SMREAD();                                                                                       \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D);  \

         a = fpu_get_scratch(dyn);                                                                       \

         VLDR128_U12(a, ed, fixedaddress);                                                               \

@@ -81,6 +82,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 VMOVQ(v0, v1);

             } else {

+                SMREAD();

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);

@@ -97,6 +99,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VSTR128_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

         case 0x12:

@@ -108,6 +111,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 DEFAULT;

                 return addr;

             }

+            SMREAD();

             addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

             VLD1_64(v0, 0, ed);

             break;

@@ -122,6 +126,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

             VST1_64(v0, 0, ed);

+            SMWRITE2();

             break;

         case 0x14:

             INST_NAME("UNPCKLPD Gx, Ex");

@@ -131,6 +136,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);

                 VMOVeD(v0, 1, v1, 0);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

                 VLD1_64(v0, 1, ed);

             }

@@ -144,6 +150,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);

                 VMOVeD(v0, 1, v1, 1);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

                 v1 = fpu_get_scratch(dyn);

                 ADDx_U12(ed, ed, 8);

@@ -159,6 +166,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 DEFAULT;

                 return addr;

             }

+            SMREAD();

             addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

             VLD1_64(v0, 1, ed);

             break;

@@ -173,6 +181,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             }

             addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);

             VST1_64(v0, 1, ed);

+            SMWRITE2();

             break;

 

         case 0x1F:

@@ -191,6 +200,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 VMOVQ(v0, v1);

             } else {

+                SMREAD();

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);

@@ -208,6 +218,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VSTR128_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

 

@@ -772,6 +783,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(MODREG) {                            \

                 ed = xRAX+(nextop&7)+(rex.b<<3);    \

             } else {                                \

+                SMREAD();                           \

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<1, 1, rex, NULL, 0, 0); \

                 LDRH_U12(x1, ed, fixedaddress);     \

                 ed = x1;                            \

@@ -1113,6 +1125,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VMOVQ(v0, v1);

             } else {

                 GETGX_empty(v0);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);

             }

@@ -1187,6 +1200,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 }

             } else {

                 GETGX_empty(v0);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 1);

                 u8 = F8;

                 if (u8) {

@@ -1218,6 +1232,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1231,6 +1246,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }

                     if(!MODREG) {

                         VSTR128_U12(q0, ed, fixedaddress);

+                        SMWRITE2();

                     }

                     break;

                 case 6:

@@ -1245,6 +1261,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1268,6 +1285,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1281,6 +1299,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }

                     if(!MODREG) {

                         VSTR128_U12(q0, ed, fixedaddress);

+                        SMWRITE2();

                     }

                     break;

                 case 6:

@@ -1295,6 +1314,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1317,6 +1337,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1334,6 +1355,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1349,6 +1371,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1366,6 +1389,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }

                         if(!MODREG) {

                             VSTR128_U12(q0, ed, fixedaddress);

+                            SMWRITE2();

                         }

                     }

                     break;

@@ -1431,6 +1455,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 } else {

                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                     VSTR64_U12(v0, ed, fixedaddress);

+                    SMWRITE2();

                 }

             } else {

                 if(MODREG) {

@@ -1439,6 +1464,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 } else {

                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

                     VSTR32_U12(v0, ed, fixedaddress);

+                    SMWRITE2();

                 }

             }

             break;

@@ -1452,6 +1478,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VSTR128_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

 

@@ -1464,6 +1491,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(MODREG) {

                 ed = xRAX+(nextop&7)+(rex.b<<3);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0);

                 SBFXw(x1, gd, 4, 12);   // r1 = (gw>>4)

                 ADDx_REG_LSL(x3, wback, x1, 1); //(&ed)+=r1*2;

@@ -1566,6 +1594,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 wback = 0;

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0);

                 SBFXw(x4, gd, 4, 12);   // r1 = (gw>>4)

                 ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2;

@@ -1583,6 +1612,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             EORx_REG(ed, ed, x1);

             if(wback) {

                 STRH_U12(ed, wback, fixedaddress);

+                SMWRITE();

             }

             break;

 

@@ -1600,6 +1630,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 }

                 UBFXxw(x1, eb1, eb2*8, 8);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0);

                 LDRB_U12(x1, ed, fixedaddress);

             }

@@ -1613,6 +1644,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 eb1 = xRAX+(nextop&7)+(rex.b<<3);

                 UBFXxw(x1, eb1, 0, 16);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff>>1, 1, rex, NULL, 0, 0);

                 LDRH_U12(x1, ed, fixedaddress);

             }

@@ -1631,6 +1663,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 wback = 0;

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<2, (1<<2)-1, rex, NULL, 0, 0);

                 SBFXw(x4, gd, 4, 12);   // r1 = (gw>>4)

                 ADDx_REG_LSL(x3, wback, x4, 1); //(&ed)+=r1*2;

@@ -1647,6 +1680,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             EORx_REG(ed, ed, x1);

             if(wback) {

                 STRH_U12(ed, wback, fixedaddress);

+                SMWRITE();

             }

             break;

         case 0xBC:

@@ -1699,6 +1733,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 }

                 SBFXw(x1, eb1, eb2, 8);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff, 0, rex, NULL, 0, 0);

                 LDRSBw_U12(x1, ed, fixedaddress);

             }

@@ -1746,6 +1781,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 ed = xRAX+(nextop&7)+(rex.b<<3);

                 VMOVQHfrom(v0, u8, ed);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1);

                 u8 = (F8)&7;

                 VLD1_16(v0, u8, wback);

@@ -1760,6 +1796,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 u8 = (F8)&7;

                 VMOVHto(gd, v0, u8);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, 1);

                 u8 = (F8)&7;

                 LDRH_U12(gd, wback, u8*2);

@@ -1858,6 +1895,7 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VSTR64_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

         case 0xD7:

diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c
index 56697b22..1ec70948 100644
--- a/src/dynarec/arm64/dynarec_arm64_6664.c
+++ b/src/dynarec/arm64/dynarec_arm64_6664.c
@@ -65,6 +65,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         v1 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0);
                     } else {
                         grab_segdata(dyn, addr, ninst, x4, _FS);
+                        SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                         v1 = fpu_get_scratch(dyn);                                                                       \
                         VLDR128_REG(v1, ed, x4);
@@ -84,6 +85,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     grab_segdata(dyn, addr, ninst, x4, _FS);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                     VSTR64_REG(v0, ed, x4);
+                    SMWRITE();
                 }
                 break;
 
@@ -113,6 +115,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 } else {
                     STRH_REG(gd, ed, x4);
                 }
+                SMWRITE();
             }
             break;
 
@@ -131,6 +134,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 }
             } else {                    // mem <= reg
                 grab_segdata(dyn, addr, ninst, x4, _FS);
+                SMREAD();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, NULL, 0, 0);
                 if(rex.w) {
                     LDRx_REG(gd, ed, x4);
diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c
index 01a739b6..bfdf24ee 100644
--- a/src/dynarec/arm64/dynarec_arm64_66f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_66f0.c
@@ -57,7 +57,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETGW(x5);
-            DMB_ISH();
+            SMDMB();
             if(MODREG) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
                 UXTHw(x6, ed);
@@ -71,7 +71,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 STLXRH(x3, x1, wback);
                 CBNZx_MARKLOCK(x3);
             }
-            DMB_ISH();
+            SMDMB();
             break;
 
         case 0x0F:
@@ -83,7 +83,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     nextop = F8;
                     GETGD;
-                    DMB_ISH();
+                    SMDMB();
                     UXTHw(x6, xRAX);
                     if(MODREG) {
                         ed = xRAX+(nextop&7)+(rex.b<<3);
@@ -122,7 +122,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     // Common part (and fallback for EAX != Ed)
                     UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x1, x3, x4, x5);}
                     BFIx(xRAX, x1, 0, 16);
-                    DMB_ISH();
+                    SMDMB();
                     break;
                     
                 case 0xC1:
@@ -131,7 +131,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     nextop = F8;
                     gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3);
                     UXTHx(x5, gd);
-                    DMB_ISH();
+                    SMDMB();
                     if(MODREG) {
                         ed = xRAX+(nextop&7)+(rex.b<<3);
                         BFIx(gd, ed, 0, 16);
@@ -150,7 +150,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         }
                         BFIx(gd, x1, 0, 16);
                     }
-                    DMB_ISH();
+                    SMDMB();
                     break;
 
                 default:
@@ -161,7 +161,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         case 0x81:
         case 0x83:
             nextop = F8;
-            DMB_ISH();
+            SMDMB();
             switch((nextop>>3)&7) {
                 case 0: //ADD
                     if(opcode==0x81) {
@@ -355,7 +355,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     }
                     break;
             }
-            DMB_ISH();
+            SMDMB();
             break;
 
             case 0xFF:
@@ -365,7 +365,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     case 0: // INC Ew
                         INST_NAME("LOCK INC Ew");
                         SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
-                        DMB_ISH();
+                        SMDMB();
                         if(MODREG) {
                             ed = xRAX+(nextop&7)+(rex.b<<3);
                             UXTHw(x6, ed);
@@ -379,12 +379,12 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             STLXRH(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        DMB_ISH();
+                        SMDMB();
                         break;
                     case 1: //DEC Ew
                         INST_NAME("LOCK DEC Ew");
                         SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
-                        DMB_ISH();
+                        SMDMB();
                         if(MODREG) {
                             ed = xRAX+(nextop&7)+(rex.b<<3);
                             UXTHw(x6, ed);
@@ -398,7 +398,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                             STLXRH(x3, x1, wback);
                             CBNZx_MARKLOCK(x3);
                         }
-                        DMB_ISH();
+                        SMDMB();
                         break;
                     default:
                         DEFAULT;
diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index aeae1dd3..1b35d645 100755
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -186,6 +186,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             s0 = sse_get_reg(dyn, ninst, x1, (nextop&7) + (rex.b<<3), 0);

                         } else {

                             s0 = fpu_get_scratch(dyn);

+                            SMREAD();

                             addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

                             VLDR32_U12(s0, ed, fixedaddress);

                         }

@@ -204,6 +205,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         VMOV(v0, v1);

                     } else {

                         v0 = mmx_get_reg_empty(dyn, ninst, x1, x2, x3, gd);

+                        SMREAD();

                         addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                         VLDR64_U12(v0, ed, fixedaddress);

                     }

@@ -219,6 +221,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     } else {

                         addr = geted32(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                         VSTR64_U12(v0, ed, fixedaddress);

+                        SMWRITE();

                     }

                     break;

 

@@ -633,7 +636,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {                    // mem <= reg

                 addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0);

                 STRxw_U12(gd, ed, fixedaddress);

-                if(lock) {DMB_ISH();}

+                SMWRITELOCK(lock);

             }

             break;

 

@@ -645,7 +648,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 MOVxw_REG(gd, xRAX+(nextop&7)+(rex.b<<3));

             } else {

                 addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 0);

-                if(lock) {DMB_ISH();}

+                SMREADLOCK(lock);

                 LDRxw_U12(gd, ed, fixedaddress);

             }

             break;

@@ -750,7 +753,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 i64 = F32S;

                 MOV64xw(x3, i64);

                 STRxw_U12(x3, ed, fixedaddress);

-                if(lock) {DMB_ISH();}

+                SMWRITELOCK(lock);

             }

             break;

 

diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index d13c91b2..b333d079 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -58,7 +58,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             INST_NAME("LOCK ADD Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            DMB_ISH();
+            SMDMB();
             GETGB(x2);
             if((nextop&0xC0)==0xC0) {
                 if(rex.rex) {
@@ -80,14 +80,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 STLXRB(x4, x1, wback);
                 CBNZx_MARKLOCK(x4);
             }
-            DMB_ISH();
+            SMDMB();
             break;
         case 0x01:
             INST_NAME("LOCK ADD Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETGD;
-            DMB_ISH();
+            SMDMB();
             if((nextop&0xC0)==0xC0) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
                 emit_add32(dyn, ninst, rex, ed, gd, x3, x4);
@@ -99,14 +99,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 STLXRxw(x3, x1, wback);
                 CBNZx_MARKLOCK(x3);
             }
-            DMB_ISH();
+            SMDMB();
             break;
 
         case 0x08:
             INST_NAME("LOCK OR Eb, Gb");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
-            DMB_ISH();
+            SMDMB();
             GETGB(x2);
             if((nextop&0xC0)==0xC0) {
                 if(rex.rex) {
@@ -128,14 +128,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 STLXRB(x4, x1, wback);
                 CBNZx_MARKLOCK(x4);
             }
-            DMB_ISH();
+            SMDMB();
             break;
         case 0x09:
             INST_NAME("LOCK OR Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETGD;
-            DMB_ISH();
+            SMDMB();
             if(MODREG) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
                 emit_or32(dyn, ninst, rex, ed, gd, x3, x4);
@@ -147,7 +147,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 STLXRxw(x3, x1, wback);
                 CBNZx_MARKLOCK(x3);
             }
-            DMB_ISH();
+            SMDMB();
             break;
 
         case 0x0F:
@@ -160,7 +160,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     nextop = F8;
                     GETGB(x1);
                     UBFXx(x6, xRAX, 0, 8);
-                    DMB_ISH();
+                    SMDMB();
                     if(MODREG) {
                         if(rex.rex) {
                             wback = xRAX+(nextop&7)+(rex.b<<3);
@@ -196,14 +196,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         UFLAG_IF {emit_cmp32(dyn, ninst, rex, x6, x2, x3, x4, x5);}
                         BFIx(xRAX, x2, 0, 8);    // upper par of RAX will be erase on 32bits, no mater what
                     }
-                    DMB_ISH();
+                    SMDMB();
                     break;
                 case 0xB1:
                     INST_NAME("LOCK CMPXCHG Ed, Gd");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     nextop = F8;
                     GETGD;
-                    DMB_ISH();
+                    SMDMB();
                     if(MODREG) {
                         ed = xRAX+(nextop&7)+(rex.b<<3);
                         wback = 0;
@@ -244,7 +244,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, x1, x3, x4, x5);}
                         MOVxw_REG(xRAX, x1);    // upper par of RAX will be erase on 32bits, no mater what
                     }
-                    DMB_ISH();
+                    SMDMB();
                     break;
 
                 case 0xC1:
@@ -252,7 +252,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     nextop = F8;
                     GETGD;
-                    DMB_ISH();
+                    SMDMB();
                     if(MODREG) {
                         ed = xRAX+(nextop&7)+(rex.b<<3);
                         MOVxw_REG(x1, ed);
@@ -284,7 +284,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         }
                         MOVxw_REG(gd, x1);
                     }
-                    DMB_ISH();
+                    SMDMB();
                     break;
 
                 case 0xC7:
@@ -292,7 +292,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_ZF, SF_SUBSET);
                     nextop = F8;
                     addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0);
-                    DMB_ISH();
+                    SMDMB();
                     MARKLOCK;
                     LDAXPxw(x2, x3, wback);
                     CMPSxw_REG(xRAX, x2);
@@ -308,7 +308,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOVxw_REG(xRDX, x3);
                     MOV32w(x1, 0);
                     MARK3;
-                    DMB_ISH();
+                    SMDMB();
                     BFIw(xFlags, x1, F_ZF, 1);
                     break;
 
@@ -322,7 +322,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETGD;
-            DMB_ISH();
+            SMDMB();
             if(MODREG) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
                 emit_and32(dyn, ninst, rex, ed, gd, x3, x4);
@@ -334,7 +334,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 STLXRxw(x3, x1, wback);
                 CBNZx_MARKLOCK(x3);
             }
-            DMB_ISH();
+            SMDMB();
             break;
             
         case 0x29:
@@ -342,7 +342,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             SETFLAGS(X_ALL, SF_SET_PENDING);
             nextop = F8;
             GETGD;
-            DMB_ISH();
+            SMDMB();
             if(MODREG) {
                 ed = xRAX+(nextop&7)+(rex.b<<3);
                 emit_sub32(dyn, ninst, rex, ed, gd, x3, x4);
@@ -354,7 +354,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 STLXRxw(x3, x1, wback);
                 CBNZx_MARKLOCK(x3);
             }
-            DMB_ISH();
+            SMDMB();
             break;
 
         case 0x66:
@@ -362,7 +362,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 
         case 0x80:
             nextop = F8;
-            DMB_ISH();
+            SMDMB();
             switch((nextop>>3)&7) {
                 case 0: //ADD
                     INST_NAME("ADD Eb, Ib");
@@ -521,12 +521,12 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 default:
                     DEFAULT;
             }
-            DMB_ISH();
+            SMDMB();
             break;
         case 0x81:
         case 0x83:
             nextop = F8;
-            DMB_ISH();
+            SMDMB();
             switch((nextop>>3)&7) {
                 case 0: //ADD
                     if(opcode==0x81) {
@@ -550,7 +550,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         emit_add32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
                         STLXRxw(x3, x1, wback);
                         CBNZx_MARKLOCK(x3);
-                        DMB_ISH();
+                        SMDMB();
                         B_NEXT_nocond;
                         MARK;   // unaligned! also, not enough 
                         LDRxw_U12(x1, wback, 0);
@@ -658,7 +658,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5);
                         STLXRxw(x3, x1, wback);
                         CBNZx_MARKLOCK(x3);
-                        DMB_ISH();
+                        SMDMB();
                         B_NEXT_nocond;
                         MARK;   // unaligned! also, not enough 
                         LDRxw_U12(x1, wback, 0);
@@ -703,7 +703,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     break;
             }
-            DMB_ISH();
+            SMDMB();
             break;
             
         case 0x86:
@@ -726,7 +726,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 BFIx(gb1, x1, gb2*8, 8);
                 BFIx(eb1, x4, eb2*8, 8);
             } else {
-                DMB_ISH();
+                SMDMB();
                 GETGB(x4);
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0);
                 MARKLOCK;
@@ -735,7 +735,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 // do the swap 14 -> strb(ed), 1 -> gd
                 STLXRB(x3, x4, ed);
                 CBNZx_MARKLOCK(x3);
-                DMB_ISH();
+                SMDMB();
                 BFIx(gb1, x1, gb2*8, 8);
             }
             break;
@@ -750,7 +750,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 MOVxw_REG(ed, x1);
             } else {
                 GETGD;
-                DMB_ISH();
+                SMDMB();
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, 0, 0, rex, LOCK_LOCK, 0, 0);
                 TSTx_mask(ed, 1, 0, 1+rex.w);    // mask=3 or 7
                 B_MARK(cNE);
@@ -763,7 +763,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 LDRxw_U12(x1, ed, 0);
                 STRxw_U12(gd, ed, 0);
                 MARK2;
-                DMB_ISH();
+                SMDMB();
                 MOVxw_REG(gd, x1);
             }
             break;
@@ -775,7 +775,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 0: // INC Ed
                     INST_NAME("LOCK INC Ed");
                     SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
-                    DMB_ISH();
+                    SMDMB();
                     if(MODREG) {
                         ed = xRAX+(nextop&7)+(rex.b<<3);
                         emit_inc32(dyn, ninst, rex, ed, x3, x4);
@@ -798,12 +798,12 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         CBNZw_MARK(x3);
                         STRxw_U12(x1, wback, 0);
                     }
-                    DMB_ISH();
+                    SMDMB();
                     break;
                 case 1: //DEC Ed
                     INST_NAME("LOCK DEC Ed");
                     SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
-                    DMB_ISH();
+                    SMDMB();
                     if(MODREG) {
                         ed = xRAX+(nextop&7)+(rex.b<<3);
                         emit_dec32(dyn, ninst, rex, ed, x3, x4);
@@ -826,7 +826,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         CBNZw_MARK(x3);
                         STRxw_U12(x1, wback, 0);
                     }
-                    DMB_ISH();
+                    SMDMB();
                     break;
                 default:
                     DEFAULT;
diff --git a/src/dynarec/arm64/dynarec_arm64_f20f.c b/src/dynarec/arm64/dynarec_arm64_f20f.c
index 7cb87af3..ff69ad54 100755
--- a/src/dynarec/arm64/dynarec_arm64_f20f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f20f.c
@@ -27,6 +27,7 @@
     if(MODREG) {                                                                                        \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \

     } else {                                                                                            \

+        SMREAD();                                                                                       \

         a = fpu_get_scratch(dyn);                                                                       \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D);   \

         VLDR64_U12(a, ed, fixedaddress);                                                                \

@@ -81,6 +82,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 d0 = sse_get_reg(dyn, ninst, x1, ed, 0);

                 VMOVeD(v0, 0, d0, 0);

             } else {

+                SMREAD();

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VLDR64_U12(v0, ed, fixedaddress); // upper part reseted

@@ -98,6 +100,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VSTR64_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

         case 0x12:

@@ -109,6 +112,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 VMOVeD(v0, 0, d0, 0);

             } else {

+                SMREAD();

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VLDR64_U12(v0, ed, fixedaddress);

@@ -413,6 +417,7 @@ uintptr_t dynarec64_F20F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VMOVQ(v0, v1);

             } else {

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 7, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);

             }

diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index 9619f260..d407bb72 100755
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -27,6 +27,7 @@
     if(MODREG) {                                                                                        \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \

     } else {                                                                                            \

+        SMREAD();                                                                                       \

         a = fpu_get_scratch(dyn);                                                                       \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D);   \

         VLDR32_U12(a, ed, fixedaddress);                                                                \

@@ -37,6 +38,7 @@
     if(MODREG) {                                                                                        \

         a = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), w);                                      \

     } else {                                                                                            \

+        SMREAD();                                                                                       \

         addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, D);  \

         a = fpu_get_scratch(dyn);                                                                       \

         VLDR128_U12(a, ed, fixedaddress);                                                               \

@@ -85,6 +87,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VMOVeS(v0, 0, q0, 0);

             } else {

                 v0 = sse_get_reg_empty(dyn, ninst, x1, gd);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

                 VLDR32_U12(v0, ed, fixedaddress);

             }

@@ -100,6 +103,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, 0);

                 VSTR32_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

         case 0x12:

@@ -108,6 +112,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(MODREG) {

                 q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 q1 = fpu_get_scratch(dyn);

                 VLDR128_U12(q1, ed, fixedaddress);

@@ -122,6 +127,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             if(MODREG) {

                 q1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 q1 = fpu_get_scratch(dyn);

                 VLDR128_U12(q1, ed, fixedaddress);

@@ -313,6 +319,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VMOVQ(v0, v1);

             } else {

                 GETGX_empty(v0);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VLDR128_U12(v0, ed, fixedaddress);

             }

@@ -348,6 +355,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 FMOVD(v0, v1);

             } else {

                 GETGX_empty(v0);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VLDR64_U12(v0, ed, fixedaddress);

             }

@@ -362,6 +370,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             } else {

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<4, 15, rex, NULL, 0, 0);

                 VSTR128_U12(v0, ed, fixedaddress);

+                SMWRITE2();

             }

             break;

 

@@ -424,6 +433,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 VEORQ(v0, v0, v0);  // usefull?

                 VMOV(v0, v1);

             } else {

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VLDR64_U12(v0, ed, fixedaddress);

             }

@@ -437,6 +447,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 v1 = sse_get_reg(dyn, ninst, x1, (nextop&7)+(rex.b<<3), 0);

             } else {

                 v1 = fpu_get_scratch(dyn);

+                SMREAD();

                 addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, 0);

                 VLDR64_U12(v1, ed, fixedaddress);

             }

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index d38c8078..d3d37a0c 100755
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -389,6 +389,7 @@ void jump_to_epilog(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
         GETIP_(ip);
     }
     TABLE64(x2, (uintptr_t)arm64_epilog);
+    SMEND();
     BR(x2);
 }
 
@@ -426,6 +427,7 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst)
     #ifdef HAVE_TRACE
     //MOVx(x3, 15);    no access to PC reg 
     #endif
+    SMEND();
     BLR(x2); // save LR...
 }
 
@@ -445,6 +447,7 @@ void ret_to_epilog(dynarec_arm_t* dyn, int ninst)
     UBFXx(x3, xRIP, 0, JMPTABL_SHIFT);
     LDRx_REG_LSL3(x2, x2, x3);
     MOVx_REG(x1, xRIP);
+    SMEND();
     BLR(x2); // save LR
     CLEARIP();
 }
@@ -471,6 +474,7 @@ void retn_to_epilog(dynarec_arm_t* dyn, int ninst, int n)
     UBFXx(x3, xRIP, 0, JMPTABL_SHIFT);
     LDRx_REG_LSL3(x2, x2, x3);
     MOVx_REG(x1, xRIP);
+    SMEND();
     BLR(x2); // save LR
     CLEARIP();
 }
@@ -503,6 +507,7 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is64bits)
     MOVx_REG(xRSP, x3);
     // Ret....
     MOV64x(x2, (uintptr_t)arm64_epilog);  // epilog on purpose, CS might have changed!
+    SMEND();
     BR(x2);
     CLEARIP();
 }
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 008c14e5..dcffd85d 100755
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -32,6 +32,30 @@
 #define PK64(a)   *(uint64_t*)(addr+a)
 #define PKip(a)   *(uint8_t*)(ip+a)
 
+// Strong mem emulation helpers
+// Sequence of Read will trigger a DMB on "first" read if strongmem is 2
+// Squence of Write will trigger a DMB on "last" write if strongmem is 1
+// Opcode will read
+#define SMREAD()    if(!dyn->smread && box64_dynarec_strongmem>1) {DMB_ISH(); dyn->smread=1;}
+// Opcode will read with option forced lock
+#define SMREADLOCK(lock)    if(lock) {SMDMB();} else if(!dyn->smread && box64_dynarec_strongmem>1) {DMB_ISH(); dyn->smread=1;}
+// Opcode migh read (depend on nextop)
+#define SMMIGHTREAD()   if(!MODREG) {SMREAD();}
+// Opcode has wrote
+#define SMWRITE()   dyn->smwrite=1
+// Opcode has wrote (strongmem>1 only)
+#define SMWRITE2()   if(box64_dynarec_strongmem>1) dyn->smwrite=1
+// Opcode has wrote with option forced lock
+#define SMWRITELOCK(lock)   if(lock) {SMDMB();} else dyn->smwrite=1
+// Opcode migh have wrote (depend on nextop)
+#define SMMIGHTWRITE()   if(!MODREG) {SMWRITE();}
+// Start of sequence
+#define SMSTART()   SMEND()
+// End of sequence
+#define SMEND()     if(dyn->smwrite && box64_dynarec_strongmem) {DMB_ISH();} dyn->smwrite=0; dyn->smread=0;
+// Force a Data memory barrier (for LOCK: prefix)
+#define SMDMB()     DMB_ISH(); if(dyn->smwrite) dyn->smwrite=0; dyn->smread=1
+
 //LOCK_* define
 #define LOCK_LOCK   (int*)1
 
@@ -42,6 +66,7 @@
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD()                            \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \
                     LDRxw_U12(x1, wback, fixedaddress); \
                     ed = x1;                            \
@@ -50,6 +75,7 @@
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<3, 7, rex, NULL, 0, D); \
                     LDRx_U12(x1, wback, fixedaddress);  \
                     ed = x1;                            \
@@ -58,6 +84,7 @@
                     ed = xEAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL,0, D); \
                     LDRw_U12(x1, wback, fixedaddress);  \
                     ed = x1;                            \
@@ -68,6 +95,7 @@
                     wb = x1;                            \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \
                     LDRSW_U12(x1, wback, fixedaddress); \
                     wb = ed = x1;                       \
@@ -76,6 +104,7 @@
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \
                     LDRxw_U12(x1, wback, fixedaddress); \
                     ed = x1;                            \
@@ -86,6 +115,7 @@
                     wb = x1;                            \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted32(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff<<2, 3, rex, NULL, 0, D); \
                     LDRSW_U12(x1, wback, fixedaddress); \
                     wb = ed = x1;                       \
@@ -95,6 +125,7 @@
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \
                     LDRxw_U12(hint, wback, fixedaddress); \
                     ed = hint;                            \
@@ -103,6 +134,7 @@
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted32(dyn, addr, ninst, nextop, &wback, (hint==x2)?x1:x2, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \
                     LDRxw_U12(hint, wback, fixedaddress); \
                     ed = hint;                            \
@@ -113,6 +145,7 @@
                     MOVxw_REG(ret, ed);                 \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \
                     ed = ret;                           \
                     LDRxw_U12(ed, wback, fixedaddress); \
@@ -122,31 +155,34 @@
                     MOVxw_REG(ret, ed);                 \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted32(dyn, addr, ninst, nextop, &wback, hint, &fixedaddress, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, D); \
                     ed = ret;                           \
                     LDRxw_U12(ed, wback, fixedaddress); \
                 }
 // Write back ed in wback (if wback not 0)
-#define WBACK       if(wback) {STRxw_U12(ed, wback, fixedaddress);}
+#define WBACK       if(wback) {STRxw_U12(ed, wback, fixedaddress); SMWRITE();}
 // Write back ed in wback (if wback not 0)
-#define WBACKx      if(wback) {STRx_U12(ed, wback, fixedaddress);}
+#define WBACKx      if(wback) {STRx_U12(ed, wback, fixedaddress); SMWRITE();}
 // Write back ed in wback (if wback not 0)
-#define WBACKw      if(wback) {STRw_U12(ed, wback, fixedaddress);}
+#define WBACKw      if(wback) {STRw_U12(ed, wback, fixedaddress); SMWRITE();}
 //GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETEDO(O, D)   if(MODREG) {                     \
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, D); \
                     LDRxw_REG(x1, wback, O);            \
                     ed = x1;                            \
                 }
-#define WBACKO(O)   if(wback) {STRxw_REG(ed, wback, O);}
+#define WBACKO(O)   if(wback) {STRxw_REG(ed, wback, O); SMWRITE2();}
 //GETEDOx can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETEDOx(O, D)  if(MODREG) {                     \
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, D); \
                     LDRx_REG(x1, wback, O);             \
                     ed = x1;                            \
@@ -157,6 +193,7 @@
                     wb = x1;                            \
                     wback = 0;                          \
                 } else {                                \
+                    SMREAD();                           \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0, 0, rex, NULL, 0, D); \
                     LDRSW_REG(x1, wback, O);            \
                     wb = ed = x1;                       \
@@ -176,6 +213,7 @@
                     ed = i;                 \
                     wb1 = 0;                \
                 } else {                    \
+                    SMREAD();               \
                     addr = geted(dyn, addr, ninst, nextop, &wback, w, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \
                     LDRH_U12(i, wback, fixedaddress); \
                     ed = i;                 \
@@ -188,6 +226,7 @@
                     ed = i;                 \
                     wb1 = 0;                \
                 } else {                    \
+                    SMREAD();               \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \
                     LDRH_U12(i, wback, fixedaddress); \
                     ed = i;                 \
@@ -200,15 +239,16 @@
                     ed = i;                 \
                     wb1 = 0;                \
                 } else {                    \
+                    SMREAD();               \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff<<1, (1<<1)-1, rex, NULL, 0, D); \
                     LDRSHx_U12(i, wback, fixedaddress);\
                     ed = i;                 \
                     wb1 = 1;                \
                 }
 // Write ed back to original register / memory
-#define EWBACK   if(wb1) {STRH_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, 0, 16);}
+#define EWBACK   if(wb1) {STRH_U12(ed, wback, fixedaddress); SMWRITE();} else {BFIx(wback, ed, 0, 16);}
 // Write w back to original register / memory
-#define EWBACKW(w)   if(wb1) {STRH_U12(w, wback, fixedaddress);} else {BFIx(wback, w, 0, 16);}
+#define EWBACKW(w)   if(wb1) {STRH_U12(w, wback, fixedaddress); SMWRITE();} else {BFIx(wback, w, 0, 16);}
 // Write back gd in correct register
 #define GWBACK       BFIx((xRAX+((nextop&0x38)>>3)+(rex.r<<3)), gd, 0, 16);
 //GETEB will use i for ed, and can use r3 for wback.
@@ -225,6 +265,7 @@
                     wb1 = 0;                    \
                     ed = i;                     \
                 } else {                        \
+                    SMREAD();                   \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \
                     LDRB_U12(i, wback, fixedaddress); \
                     wb1 = 1;                    \
@@ -244,6 +285,7 @@
                     wb1 = 0;                    \
                     ed = i;                     \
                 } else {                        \
+                    SMREAD();                   \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0, 0, rex, NULL, 0, D); \
                     ADDx_REG(x3, wback, i);     \
                     if(wback!=x3) wback = x3;   \
@@ -265,6 +307,7 @@
                     wb1 = 0;                    \
                     ed = i;                     \
                 } else {                        \
+                    SMREAD();                   \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \
                     LDRSBx_U12(i, wback, fixedaddress); \
                     wb1 = 1;                    \
@@ -284,13 +327,14 @@
                     wb1 = 0;                    \
                     ed = i;                     \
                 } else {                        \
+                    SMREAD();                   \
                     addr = geted32(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, NULL, 0, D); \
                     LDRB_U12(i, wback, fixedaddress); \
                     wb1 = 1;                    \
                     ed = i;                     \
                 }
 // Write eb (ed) back to original register / memory
-#define EBBACK   if(wb1) {STRB_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, wb2, 8);}
+#define EBBACK   if(wb1) {STRB_U12(ed, wback, fixedaddress); SMWRITE();} else {BFIx(wback, ed, wb2, 8);}
 //GETGB will use i for gd
 #define GETGB(i)    if(rex.rex) {               \
                         gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3);     \
diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h
index 2a003711..13b2b323 100755
--- a/src/dynarec/arm64/dynarec_arm64_pass3.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass3.h
@@ -15,7 +15,7 @@
 #define INST_NAME(name) \
     if(box64_dynarec_dump) {\
         printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \
-        dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X", \
+        dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", \
             (box64_dynarec_dump>1)?"\e[32m":"", \
             (void*)(dyn->native_start+dyn->insts[ninst].address),  \
             dyn->insts[ninst].size/4,           \
@@ -29,7 +29,8 @@
             dyn->insts[ninst].x64.gen_flags,    \
             dyn->insts[ninst].x64.use_flags,    \
             dyn->insts[ninst].x64.need_before,  \
-            dyn->insts[ninst].x64.need_after);  \
+            dyn->insts[ninst].x64.need_after,   \
+            dyn->smread, dyn->smwrite);         \
         if(dyn->insts[ninst].pred_sz) {         \
             dynarec_log(LOG_NONE, ", pred=");   \
             for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)\
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 37d43372..6abdf829 100755
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -82,7 +82,7 @@ typedef struct instruction_arm64_s {
 } instruction_arm64_t;
 
 typedef struct dynarec_arm_s {
-    instruction_arm64_t *insts;
+    instruction_arm64_t*insts;
     int32_t             size;
     int32_t             cap;
     uintptr_t           start;      // start of the block
@@ -91,7 +91,7 @@ typedef struct dynarec_arm_s {
     uintptr_t           native_start;  // start of the arm code
     size_t              native_size;   // size of emitted arm code
     uintptr_t           last_ip;    // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something
-    uint64_t            *table64;   // table of 64bits value
+    uint64_t*           table64;   // table of 64bits value
     int                 table64size;// size of table (will be appended at end of executable code)
     int                 table64cap;
     uintptr_t           tablestart;
@@ -103,6 +103,8 @@ typedef struct dynarec_arm_s {
     int*                predecessor;// single array of all predecessor
     dynablock_t*        dynablock;
     instsize_t*         instsize;
+    uint8_t             smread;    // for strongmem model emulation
+    uint8_t             smwrite;    // for strongmem model emulation
 } dynarec_arm_t;
 
 void add_next(dynarec_arm_t *dyn, uintptr_t addr);
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index 7ab25a6f..12dbda7d 100755
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -124,6 +124,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
         dyn->n.stack_push = 0;
         dyn->n.swapped = 0;
         NEW_INST;
+        if(dyn->insts[ninst].pred_sz>1) {SMSTART();}
         fpu_reset_scratch(dyn);
         if((dyn->insts[ninst].x64.need_before&~X_PEND) && !dyn->insts[ninst].pred_sz) {
             READFLAGS(dyn->insts[ninst].x64.need_before&~X_PEND);
diff --git a/src/main.c b/src/main.c
index 8ec81411..e1221454 100755
--- a/src/main.c
+++ b/src/main.c
@@ -1267,7 +1267,7 @@ int main(int argc, const char **argv, char **env) {
         printf_log(LOG_INFO, "dota2 detected, forcing dummy crashhandler\n");
         box64_dummy_crashhandler = 1;
         #ifdef DYNAREC
-        box64_dynarec_strongmem = 2;
+        box64_dynarec_strongmem = 1;
         #endif
     }
     // special case for steam-runtime-check-requirements to fake 64bits suport