about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-02-12 16:38:12 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-02-12 16:38:12 +0100
commit96a7d1e7ec0fa1266304779389359804e6108795 (patch)
tree889bec6cc539ff6a6ea655b56d293c874480700e
parent2d2a65b616b1bb4250cfdc1d300f7bbc14685a3a (diff)
downloadbox64-96a7d1e7ec0fa1266304779389359804e6108795.tar.gz
box64-96a7d1e7ec0fa1266304779389359804e6108795.zip
[DYNAREC] Use custom mutex, improved Signal while FillBlocks64 and atomic handling
-rwxr-xr-xsrc/box64context.c31
-rw-r--r--src/custommem.c108
-rwxr-xr-xsrc/dynarec/arm64/arm64_lock.S53
-rwxr-xr-xsrc/dynarec/arm64/arm64_lock.h11
-rwxr-xr-xsrc/dynarec/dynablock.c20
-rwxr-xr-xsrc/dynarec/dynarec_native.c6
-rwxr-xr-xsrc/dynarec/native_lock.h3
-rwxr-xr-xsrc/emu/x64int3.c15
-rwxr-xr-xsrc/emu/x64run_private.c6
-rwxr-xr-xsrc/emu/x64tls.c10
-rwxr-xr-xsrc/include/box64context.h28
-rwxr-xr-xsrc/include/threads.h2
-rwxr-xr-xsrc/libtools/signals.c44
-rwxr-xr-xsrc/libtools/threads.c38
-rwxr-xr-xsrc/tools/bridge.c12
15 files changed, 245 insertions, 142 deletions
diff --git a/src/box64context.c b/src/box64context.c
index 467accfa..67f7f584 100755
--- a/src/box64context.c
+++ b/src/box64context.c
@@ -75,11 +75,20 @@ int unlockMutex()
 {
     int ret = unlockCustommemMutex();
     int i;
+    #ifdef DYNAREC
+    uint32_t tid = (uint32_t)GetTID();
     #define GO(A, B)                    \
+        i = (native_lock_storeifref2_d(&A, 0, tid)==tid); \
+        if(i) {                         \
+            ret|=(1<<B);                \
+        }
+    #else
+        #define GO(A, B)                \
         i = checkUnlockMutex(&A);       \
         if(i) {                         \
             ret|=(1<<B);                \
         }
+    #endif
 
     GO(my_context->mutex_trace, 7)
     #ifdef DYNAREC
@@ -100,7 +109,7 @@ void relockMutex(int locks)
     relockCustommemMutex(locks);
     #define GO(A, B)                    \
         if(locks&(1<<B))                \
-            pthread_mutex_trylock(&A);  \
+            mutex_trylock(&A);          \
 
     GO(my_context->mutex_trace, 7)
     #ifdef DYNAREC
@@ -116,20 +125,24 @@ void relockMutex(int locks)
 
 static void init_mutexes(box64context_t* context)
 {
+#ifndef DYNAREC
     pthread_mutexattr_t attr;
     pthread_mutexattr_init(&attr);
     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
-    pthread_mutex_init(&context->mutex_trace, &attr);
-#ifndef DYNAREC
     pthread_mutex_init(&context->mutex_lock, &attr);
-#else
-    pthread_mutex_init(&context->mutex_dyndump, &attr);
-#endif
+    pthread_mutex_init(&context->mutex_trace, &attr);
     pthread_mutex_init(&context->mutex_tls, &attr);
     pthread_mutex_init(&context->mutex_thread, &attr);
     pthread_mutex_init(&context->mutex_bridge, &attr);
 
     pthread_mutexattr_destroy(&attr);
+#else
+    native_lock_store(&context->mutex_trace, 0);
+    native_lock_store(&context->mutex_tls, 0);
+    native_lock_store(&context->mutex_thread, 0);
+    native_lock_store(&context->mutex_bridge, 0);
+    native_lock_store(&context->mutex_dyndump, 0);
+#endif
 }
 
 static void atfork_child_box64context(void)
@@ -310,15 +323,13 @@ void FreeBox64Context(box64context_t** context)
 
     finiAllHelpers(ctx);
 
-    pthread_mutex_destroy(&ctx->mutex_trace);
 #ifndef DYNAREC
+    pthread_mutex_destroy(&ctx->mutex_trace);
     pthread_mutex_destroy(&ctx->mutex_lock);
-#else
-    pthread_mutex_destroy(&ctx->mutex_dyndump);
-#endif
     pthread_mutex_destroy(&ctx->mutex_tls);
     pthread_mutex_destroy(&ctx->mutex_thread);
     pthread_mutex_destroy(&ctx->mutex_bridge);
+#endif
 
     freeCycleLog(ctx);
 
diff --git a/src/custommem.c b/src/custommem.c
index dafdc61f..ee3d6a22 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -43,8 +43,12 @@ static uintptr_t           box64_jmptbldefault0[1<<JMPTABL_SHIFT];
 // lock addresses
 KHASH_SET_INIT_INT64(lockaddress)
 static kh_lockaddress_t    *lockaddress = NULL;
-#endif
+static uint32_t            mutex_prot;
+static uint32_t            mutex_blocks;
+#else
 static pthread_mutex_t     mutex_prot;
+static pthread_mutex_t     mutex_blocks;
+#endif
 #if defined(PAGE64K)
 #define MEMPROT_SHIFT 16
 #define MEMPROT_SHIFT2 (16+16)
@@ -85,7 +89,6 @@ typedef struct blocklist_s {
 
 #define MMAPSIZE (256*1024)      // allocate 256kb sized blocks
 
-static pthread_mutex_t     mutex_blocks;
 static int                 n_blocks = 0;       // number of blocks for custom malloc
 static int                 c_blocks = 0;       // capacity of blocks for custom malloc
 static blocklist_t*        p_blocks = NULL;    // actual blocks for custom malloc
@@ -299,7 +302,7 @@ void* customMalloc(size_t size)
     // look for free space
     void* sub = NULL;
     size_t fullsize = size+2*sizeof(blockmark_t);
-    pthread_mutex_lock(&mutex_blocks);
+    mutex_lock(&mutex_blocks);
     for(int i=0; i<n_blocks; ++i) {
         if(p_blocks[i].maxfree>=size) {
             size_t rsize = 0;
@@ -312,7 +315,7 @@ void* customMalloc(size_t size)
                     p_blocks[i].first = getNextFreeBlock(sub);
                 if(rsize==p_blocks[i].maxfree)
                     p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size, p_blocks[i].first);
-                pthread_mutex_unlock(&mutex_blocks);
+                mutex_unlock(&mutex_blocks);
                 return ret;
             }
         }
@@ -345,7 +348,7 @@ void* customMalloc(size_t size)
     // alloc 1st block
     void* ret  = allocBlock(p_blocks[i].block, p, size, NULL);
     p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size, NULL);
-    pthread_mutex_unlock(&mutex_blocks);
+    mutex_unlock(&mutex_blocks);
     return ret;
 }
 void* customCalloc(size_t n, size_t size)
@@ -361,7 +364,7 @@ void* customRealloc(void* p, size_t size)
         return customMalloc(size);
     size = roundSize(size);
     uintptr_t addr = (uintptr_t)p;
-    pthread_mutex_lock(&mutex_blocks);
+    mutex_lock(&mutex_blocks);
     for(int i=0; i<n_blocks; ++i) {
         if ((addr>(uintptr_t)p_blocks[i].block) 
          && (addr<((uintptr_t)p_blocks[i].block+p_blocks[i].size))) {
@@ -370,17 +373,17 @@ void* customRealloc(void* p, size_t size)
                 if(sub<p_blocks[i].first && p+size<p_blocks[i].first)
                     p_blocks[i].first = getNextFreeBlock(sub);
                 p_blocks[i].maxfree = getMaxFreeBlock(p_blocks[i].block, p_blocks[i].size, p_blocks[i].first);
-                pthread_mutex_unlock(&mutex_blocks);
+                mutex_unlock(&mutex_blocks);
                 return p;
             }
-            pthread_mutex_unlock(&mutex_blocks);
+            mutex_unlock(&mutex_blocks);
             void* newp = customMalloc(size);
             memcpy(newp, p, sizeBlock(sub));
             customFree(p);
             return newp;
         }
     }
-    pthread_mutex_unlock(&mutex_blocks);
+    mutex_unlock(&mutex_blocks);
     if(n_blocks)
         dynarec_log(LOG_NONE, "Warning, block %p not found in p_blocks for realloc, malloc'ing again without free\n", (void*)addr);
     return customMalloc(size);
@@ -390,7 +393,7 @@ void customFree(void* p)
     if(!p)
         return;
     uintptr_t addr = (uintptr_t)p;
-    pthread_mutex_lock(&mutex_blocks);
+    mutex_lock(&mutex_blocks);
     for(int i=0; i<n_blocks; ++i) {
         if ((addr>(uintptr_t)p_blocks[i].block) 
          && (addr<((uintptr_t)p_blocks[i].block+p_blocks[i].size))) {
@@ -400,11 +403,11 @@ void customFree(void* p)
             if(sub<=p_blocks[i].first)
                 p_blocks[i].first = getPrevFreeBlock(n);
             if(p_blocks[i].maxfree < newfree) p_blocks[i].maxfree = newfree;
-            pthread_mutex_unlock(&mutex_blocks);
+            mutex_unlock(&mutex_blocks);
             return;
         }
     }
-    pthread_mutex_unlock(&mutex_blocks);
+    mutex_unlock(&mutex_blocks);
     if(n_blocks)
         dynarec_log(LOG_NONE, "Warning, block %p not found in p_blocks for Free\n", (void*)addr);
 }
@@ -933,7 +936,7 @@ void protectDB(uintptr_t addr, uintptr_t size)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     if(end<idx) // memory addresses higher than 48bits are not tracked
         return;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
     int ret;
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
         if(memprot[i].prot==memprot_default) {
@@ -955,7 +958,7 @@ void protectDB(uintptr_t addr, uintptr_t size)
         } else 
             memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC_R;
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 // Add the Write flag from an adress range, and mark all block as dirty
@@ -969,7 +972,7 @@ void unprotectDB(uintptr_t addr, size_t size, int mark)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     if(end<idx) // memory addresses higher than 48bits are not tracked
         return;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
         if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
@@ -989,7 +992,7 @@ void unprotectDB(uintptr_t addr, size_t size, int mark)
         } else if(prot&PROT_DYNAREC_R)
             memprot[i>>16].prot[i&0xffff] = prot&~PROT_CUSTOM;
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 int isprotectedDB(uintptr_t addr, size_t size)
@@ -1113,14 +1116,12 @@ void removeMapMem(uintptr_t begin, uintptr_t end)
 void updateProtection(uintptr_t addr, size_t size, uint32_t prot)
 {
     dynarec_log(LOG_DEBUG, "updateProtection %p:%p 0x%x\n", (void*)addr, (void*)(addr+size-1), prot);
-    addMapMem(addr, addr+size-1);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
     if(end>=(1LL<<(48-MEMPROT_SHIFT)))
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
-    if(end<idx) // memory addresses higher than 48bits are not tracked
-        return;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
+    addMapMem(addr, addr+size-1);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
         if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
@@ -1136,19 +1137,17 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot)
         }
         memprot[i>>16].prot[i&0xffff] = prot|dyn;
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 void setProtection(uintptr_t addr, size_t size, uint32_t prot)
 {
-    addMapMem(addr, addr+size-1);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
     if(end>=(1LL<<(48-MEMPROT_SHIFT)))
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
-    if(end<idx) // memory addresses higher than 48bits are not tracked
-        return;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
+    addMapMem(addr, addr+size-1);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) {
         if(memprot[i].prot==memprot_default && prot) {
             uint8_t* newblock = box_calloc(MEMPROT_SIZE, sizeof(uint8_t));
@@ -1161,32 +1160,30 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot)
                 memprot[i].prot[j] = prot;
         }
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 void refreshProtection(uintptr_t addr)
 {
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     if(memprot[idx>>16].prot!=memprot_default) {
         int prot = memprot[idx>>16].prot[idx&0xffff];
         int ret = mprotect((void*)(idx<<MEMPROT_SHIFT), box64_pagesize, prot&~PROT_CUSTOM);
 printf_log(LOG_INFO, "refreshProtection(%p): %p/0x%x (ret=%d/%s)\n", (void*)addr, (void*)(idx<<MEMPROT_SHIFT), prot, ret, ret?strerror(errno):"ok");
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
 {
     dynarec_log(LOG_DEBUG, "allocProtection %p:%p 0x%x\n", (void*)addr, (void*)(addr+size-1), prot);
-    addMapMem(addr, addr+size-1);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
     if(end>=(1LL<<(48-MEMPROT_SHIFT)))
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
-    if(end<idx) // memory addresses higher than 48bits are not tracked
-        return;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
+    addMapMem(addr, addr+size-1);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
         if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
@@ -1202,7 +1199,7 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
         }
         i+=finish-start;    // +1 from the "for" loop
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 #ifdef DYNAREC
@@ -1255,7 +1252,7 @@ void AddHotPage(uintptr_t addr) {
     int base = (addr>>MEMPROT_SHIFT)&0xffff;
     if(!memprot[idx].hot) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
-            if (native_lock_storeifnull(&memprot[idx].hot, newblock) != newblock) {
+            if (native_lock_storeifnull(&memprot[idx].hot, newblock)) {
                 box_free(newblock);
             }
     }
@@ -1297,14 +1294,12 @@ static int blockempty(uint8_t* mem)
 void freeProtection(uintptr_t addr, size_t size)
 {
     dynarec_log(LOG_DEBUG, "freeProtection %p:%p\n", (void*)addr, (void*)(addr+size-1));
-    removeMapMem(addr, addr+size-1);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
     if(end>=(1LL<<(48-MEMPROT_SHIFT)))
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
-    if(end<idx) // memory addresses higher than 48bits are not tracked
-        return;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
+    removeMapMem(addr, addr+size-1);
     for (uintptr_t i=idx; i<=end; ++i) {
         const uint32_t key = (i>>16);
         const uintptr_t start = i&(MEMPROT_SIZE-1);
@@ -1322,38 +1317,25 @@ void freeProtection(uintptr_t addr, size_t size)
             } else {
                 memset(block+start, 0, (finish-start+1)*sizeof(uint8_t));
                 // blockempty is quite slow, so disable the free of blocks for now
-#if 0 //def ARM64   //disabled for now, not useful with the mutex
-                if (blockempty(block)) {
-                    block = (void*)native_lock_xchg(&memprot[key], (uintptr_t)memprot_default);
-                    if(!blockempty(block)) {
-                        block = (void*)native_lock_xchg(&memprot[key], (uintptr_t)block);
-                        for (int i = 0; i < 0x10000; ++i) {
-                            memprot[key][i] |= block[i];
-                        }
-                    }
-                    if (block != memprot_default) box_free(block);
-                }
-#else
                 /*else if(blockempty(block)) {
                     memprot[key] = memprot_default;
                     box_free(block);
                 }*/
-#endif
             }
         }
         i+=finish-start;    // +1 from the "for" loop
     }
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
 }
 
 uint32_t getProtection(uintptr_t addr)
 {
     if(addr>=(1LL<<48))
         return 0;
-    pthread_mutex_lock(&mutex_prot);
+    mutex_lock(&mutex_prot);
     const uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uint32_t ret = memprot[idx>>16].prot[idx&0xffff];
-    pthread_mutex_unlock(&mutex_prot);
+    mutex_unlock(&mutex_prot);
     return ret;
 }
 
@@ -1417,11 +1399,20 @@ int unlockCustommemMutex()
 {
     int ret = 0;
     int i = 0;
+    #ifdef DYNAREC
+    uint32_t tid = (uint32_t)GetTID();
+    #define GO(A, B)                    \
+        i = (native_lock_storeifref2_d(&A, 0, tid)==tid); \
+        if(i) {                         \
+            ret|=(1<<B);                \
+        }
+    #else
     #define GO(A, B)                    \
         i = checkUnlockMutex(&A);       \
         if(i) {                         \
             ret|=(1<<B);                \
         }
+    #endif
     GO(mutex_blocks, 0)
     GO(mutex_prot, 1)
     #undef GO
@@ -1432,7 +1423,7 @@ void relockCustommemMutex(int locks)
 {
     #define GO(A, B)                    \
         if(locks&(1<<B))                \
-            pthread_mutex_trylock(&A);  \
+            mutex_trylock(&A);          \
 
     GO(mutex_blocks, 0)
     GO(mutex_prot, 1)
@@ -1441,6 +1432,10 @@ void relockCustommemMutex(int locks)
 
 static void init_mutexes(void)
 {
+    #ifdef DYNAREC
+    native_lock_store(&mutex_blocks, 0);
+    native_lock_store(&mutex_prot, 0);
+    #else
     pthread_mutexattr_t attr;
     pthread_mutexattr_init(&attr);
     pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
@@ -1448,6 +1443,7 @@ static void init_mutexes(void)
     pthread_mutex_init(&mutex_prot, &attr);
 
     pthread_mutexattr_destroy(&attr);
+    #endif
 }
 
 static void atfork_child_custommem(void)
@@ -1561,8 +1557,10 @@ void fini_custommem_helper(box64context_t *ctx)
         box_free(p_blocks[i].block);
         #endif
     box_free(p_blocks);
+    #ifndef DYNAREC
     pthread_mutex_destroy(&mutex_prot);
     pthread_mutex_destroy(&mutex_blocks);
+    #endif
     while(mapmem) {
         mapmem_t *tmp = mapmem;
         mapmem = mapmem->next;
diff --git a/src/dynarec/arm64/arm64_lock.S b/src/dynarec/arm64/arm64_lock.S
index 209a666c..ce95dd89 100755
--- a/src/dynarec/arm64/arm64_lock.S
+++ b/src/dynarec/arm64/arm64_lock.S
@@ -18,7 +18,10 @@
 .global arm64_lock_xchg
 .global arm64_lock_xchg_d
 .global arm64_lock_storeifnull
+.global arm64_lock_storeifnull_d
 .global arm64_lock_storeifref
+.global arm64_lock_storeifref_d
+.global arm64_lock_storeifref2_d
 .global arm64_lock_decifnot0b
 .global arm64_lock_storeb
 .global arm64_lock_incif0
@@ -119,21 +122,33 @@ arm64_lock_xchg_d_0:
 arm64_lock_storeifnull:
     dmb     ish
 arm64_lock_storeifnull_0:
-    // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value)
+    // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return old [x0] value
     ldaxr   x2, [x0]
     cbnz    x2, arm64_lock_storeifnull_exit
-    mov     x2, x1
-    stlxr   w3, x2, [x0]
+    stlxr   w3, x1, [x0]
     cbnz    w3, arm64_lock_storeifnull_0
 arm64_lock_storeifnull_exit:
     dmb     ish
     mov     x0, x2
     ret
 
+arm64_lock_storeifnull_d:
+    dmb     ish
+arm64_lock_storeifnull_d_0:
+    // address is x0, value is w1, w1 store to x0 only if [x0] is 0. return old [x0] value
+    ldaxr   w2, [x0]
+    cbnz    w2, arm64_lock_storeifnull_d_exit
+    stlxr   w3, w1, [x0]
+    cbnz    w3, arm64_lock_storeifnull_d_0
+arm64_lock_storeifnull_d_exit:
+    dmb     ish
+    mov     w0, w2
+    ret
+
 arm64_lock_storeifref:
     dmb     ish
 arm64_lock_storeifref_0:
-    // address is x0, value is x1, x1 store to x0 only if [x0] is x3. return new [x0] value (so x1 or old value)
+    // address is x0, value is x1, x1 store to x0 only if [x0] is x2. return new [x0] value (so x1 or old value)
     ldaxr   x3, [x0]
     cmp     x2, x3
     bne     arm64_lock_storeifref_exit
@@ -146,6 +161,36 @@ arm64_lock_storeifref_exit:
     mov     x0, x3
     ret
 
+arm64_lock_storeifref_d:
+    dmb     ish
+arm64_lock_storeifref_d_0:
+    // address is x0, value is w1, w1 store to x0 only if [x0] is w2. return new [x0] value (so x1 or old value)
+    ldaxr   w3, [x0]
+    cmp     w2, w3
+    bne     arm64_lock_storeifref_d_exit
+    stlxr   w4, w1, [x0]
+    cbnz    w4, arm64_lock_storeifref_d_0
+    mov     w0, w1
+    ret
+arm64_lock_storeifref_d_exit:
+    dmb     ish
+    mov     w0, w3
+    ret
+
+arm64_lock_storeifref2_d:
+    dmb     ish
+arm64_lock_storeifref2_d_0:
+    // address is x0, value is w1, w1 store to x0 only if [x0] is w2. return old [x0] value
+    ldaxr   w3, [x0]
+    cmp     w2, w3
+    bne     arm64_lock_storeifref2_d_exit
+    stlxr   w4, w1, [x0]
+    cbnz    w4, arm64_lock_storeifref2_d_0
+arm64_lock_storeifref2_d_exit:
+    dmb     ish
+    mov     w0, w3
+    ret
+
 arm64_lock_decifnot0b:
     dmb     ish
 arm64_lock_decifnot0b_0:
diff --git a/src/dynarec/arm64/arm64_lock.h b/src/dynarec/arm64/arm64_lock.h
index 8f99321b..7e303da2 100755
--- a/src/dynarec/arm64/arm64_lock.h
+++ b/src/dynarec/arm64/arm64_lock.h
@@ -33,12 +33,21 @@ extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val);
 // Atomicaly exchange value at [p] with val, return old p
 extern uint32_t arm64_lock_xchg_d(void* p, uint32_t val);
 
-// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old)
+// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value
+extern uint32_t arm64_lock_storeifnull_d(void*p, uint32_t val);
+
+// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value
 extern void* arm64_lock_storeifnull(void*p, void* val);
 
 // Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
 extern void* arm64_lock_storeifref(void*p, void* val, void* ref);
 
+// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern uint32_t arm64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref);
+
+// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern uint32_t arm64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref);
+
 // decrement atomicaly the byte at [p] (but only if p not 0)
 extern void arm64_lock_decifnot0b(void*p);
 
diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c
index 63b9e336..1ce5708b 100755
--- a/src/dynarec/dynablock.c
+++ b/src/dynarec/dynablock.c
@@ -46,7 +46,7 @@ void FreeDynablock(dynablock_t* db, int need_lock)
             return; // already in the process of deletion!
         dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size-1, db->gone);
         if(need_lock)
-            pthread_mutex_lock(&my_context->mutex_dyndump);
+            mutex_lock(&my_context->mutex_dyndump);
         // remove jumptable
         setJumpTableDefault64(db->x64_addr);
         dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->actual_block, db->size);
@@ -55,7 +55,7 @@ void FreeDynablock(dynablock_t* db, int need_lock)
         FreeDynarecMap(db, (uintptr_t)db->actual_block, db->size);
         customFree(db);
         if(need_lock)
-            pthread_mutex_unlock(&my_context->mutex_dyndump);
+            mutex_unlock(&my_context->mutex_dyndump);
     }
 }
 
@@ -163,12 +163,12 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
         return block;
 
     if(need_lock)
-        pthread_mutex_lock(&my_context->mutex_dyndump);
+        mutex_lock(&my_context->mutex_dyndump);
     
     block = getDB(addr);    // just in case
     if(block) {
         if(need_lock)
-            pthread_mutex_unlock(&my_context->mutex_dyndump);
+            mutex_unlock(&my_context->mutex_dyndump);
         return block;
     }
     
@@ -179,7 +179,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
     if(sigsetjmp(&dynarec_jmpbuf, 1)) {
         printf_log(LOG_INFO, "FillBlock at %p triggered a segfault, cancelling\n", (void*)addr);
         if(need_lock)
-            pthread_mutex_unlock(&my_context->mutex_dyndump);
+            mutex_unlock(&my_context->mutex_dyndump);
         return NULL;
     }
     void* ret = FillBlock64(block, filladdr);
@@ -203,7 +203,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
         }
     }
     if(need_lock)
-        pthread_mutex_unlock(&my_context->mutex_dyndump);
+        mutex_unlock(&my_context->mutex_dyndump);
 
     dynarec_log(LOG_DEBUG, "%04d| --- DynaRec Block created @%p:%p (%p, 0x%x bytes)\n", GetTID(), (void*)addr, (void*)(addr+((block)?block->x64_size:1)-1), (block)?block->block:0, (block)?block->size:0);
 
@@ -219,7 +219,7 @@ dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create)
             return NULL;
         }
         uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size);
-        if(pthread_mutex_trylock(&my_context->mutex_dyndump)) {
+        if(mutex_trylock(&my_context->mutex_dyndump)) {
             dynarec_log(LOG_DEBUG, "mutex_dyndump not available when trying to validate block %p from %p:%p (hash:%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, (void*)addr);
             return NULL;
         }
@@ -237,7 +237,7 @@ dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create)
             // fill back jumptable
             setJumpTableIfRef64(db->x64_addr, db->block, db->jmpnext);
         }
-        pthread_mutex_unlock(&my_context->mutex_dyndump);
+        mutex_unlock(&my_context->mutex_dyndump);
     } 
     return db;
 }
@@ -248,7 +248,7 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr)
     int create = 1;
     dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, 1);
     if(db && db->done && db->block && db->need_test) {
-        if(pthread_mutex_trylock(&my_context->mutex_dyndump))
+        if(mutex_trylock(&my_context->mutex_dyndump))
             return NULL;
         uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size);
         if(hash!=db->hash) {
@@ -264,7 +264,7 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr)
             // fill back jumptable
             addJumpTableIfDefault64(db->x64_addr, db->block);
         }
-        pthread_mutex_unlock(&my_context->mutex_dyndump);
+        mutex_unlock(&my_context->mutex_dyndump);
     } 
     return db;
 }
diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index ad496068..506eaf0f 100755
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -376,7 +376,7 @@ static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) {
     return ninst;
 }
 
-__thread void* current_helper = NULL;
+void* current_helper = NULL;
 
 void CancelBlock64()
 {
@@ -433,6 +433,10 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
         dynarec_log(LOG_INFO, "Create empty block in no-dynarec zone\n");
         return CreateEmptyBlock(block, addr);
     }
+    if(current_helper) {
+        dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock at %p as anothor one is going on\n", (void*)addr);
+        return NULL;
+    }
     // protect the 1st page
     protectDB(addr, 1);
     // init the helper
diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h
index 09917f37..32cf56fa 100755
--- a/src/dynarec/native_lock.h
+++ b/src/dynarec/native_lock.h
@@ -17,7 +17,10 @@
 #define native_lock_xchg(A, B)              arm64_lock_xchg(A, B)

 #define native_lock_xchg_d(A, B)            arm64_lock_xchg_d(A, B)

 #define native_lock_storeifref(A, B, C)     arm64_lock_storeifref(A, B, C)

+#define native_lock_storeifref_d(A, B, C)   arm64_lock_storeifref_d(A, B, C)

+#define native_lock_storeifref2_d(A, B, C)  arm64_lock_storeifref2_d(A, B, C)

 #define native_lock_storeifnull(A, B)       arm64_lock_storeifnull(A, B)

+#define native_lock_storeifnull_d(A, B)     arm64_lock_storeifnull_d(A, B)

 #define native_lock_decifnot0b(A)           arm64_lock_decifnot0b(A)

 #define native_lock_storeb(A, B)            arm64_lock_storeb(A, B)

 #define native_lock_incif0(A)               arm64_lock_incif0(A)

diff --git a/src/emu/x64int3.c b/src/emu/x64int3.c
index d0d71c89..49cc0007 100755
--- a/src/emu/x64int3.c
+++ b/src/emu/x64int3.c
@@ -101,7 +101,6 @@ void x64Int3(x64emu_t* emu, uintptr_t* addr)
             int have_trace = 0;
             if(h && strstr(ElfName(h), "libMiles")) have_trace = 1;*/
             if(box64_log>=LOG_DEBUG || cycle_log) {
-                pthread_mutex_lock(&emu->context->mutex_trace);
                 int tid = GetTID();
                 char t_buff[256] = "\0";
                 char buff2[64] = "\0";
@@ -254,10 +253,12 @@ void x64Int3(x64emu_t* emu, uintptr_t* addr)
                 } else {
                     snprintf(buff, 256, "%04d|%p: Calling %s(0x%lX, 0x%lX, 0x%lX, ...)", tid, *(void**)(R_RSP), s, R_RDI, R_RSI, R_RDX);
                 }
-                if(!cycle_log) printf_log(LOG_NONE, "%s =>", buff);
-                pthread_mutex_unlock(&emu->context->mutex_trace);
+                if(!cycle_log) {
+                    mutex_lock(&emu->context->mutex_trace);
+                    printf_log(LOG_NONE, "%s =>", buff);
+                    mutex_unlock(&emu->context->mutex_trace);
+                }
                 w(emu, a);   // some function never come back, so unlock the mutex first!
-                pthread_mutex_lock(&emu->context->mutex_trace);
                 if(post)
                     switch(post) { // Only ever 2 for now...
                     case 1: snprintf(buff2, 64, " [%llu sec %llu nsec]", pu64?pu64[0]:~0ull, pu64?pu64[1]:~0ull);
@@ -285,9 +286,11 @@ void x64Int3(x64emu_t* emu, uintptr_t* addr)
                     snprintf(buff3, 64, " (errno=%d:\"%s\")", errno, strerror(errno));
                 if(cycle_log)
                     snprintf(buffret, 128, "0x%lX%s%s", R_RAX, buff2, buff3);
-                else
+                else {
+                    mutex_lock(&emu->context->mutex_trace);
                     printf_log(LOG_NONE, " return 0x%lX%s%s\n", R_RAX, buff2, buff3);
-                pthread_mutex_unlock(&emu->context->mutex_trace);
+                    mutex_unlock(&emu->context->mutex_trace);
+                }
             } else
                 w(emu, a);
         }
diff --git a/src/emu/x64run_private.c b/src/emu/x64run_private.c
index 7929e2fc..a78ca678 100755
--- a/src/emu/x64run_private.c
+++ b/src/emu/x64run_private.c
@@ -1033,7 +1033,7 @@ void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec)
             (trace_end == 0) 
             || ((ip >= trace_start) && (ip < trace_end))) ) {
         int tid = syscall(SYS_gettid);
-        pthread_mutex_lock(&my_context->mutex_trace);
+        mutex_lock(&my_context->mutex_trace);
 #ifdef DYNAREC
         if((my_context->trace_tid != tid) || (my_context->trace_dynarec!=dynarec)) {
             printf_log(LOG_NONE, "Thread %04d| (%s) |\n", tid, dynarec?"dyn":"int");
@@ -1050,7 +1050,7 @@ void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec)
         printf_log(LOG_NONE, "%s", DumpCPURegs(emu, ip));
         if(R_RIP==0) {
             printf_log(LOG_NONE, "Running at NULL address\n");
-            pthread_mutex_unlock(&my_context->mutex_trace);
+            mutex_unlock(&my_context->mutex_trace);
             return;
         }
         if(PK(0)==0xcc && PK(1)=='S' && PK(2)=='C') {
@@ -1090,7 +1090,7 @@ void PrintTrace(x64emu_t* emu, uintptr_t ip, int dynarec)
             }
             printf_log(LOG_NONE, "\n");
         }
-        pthread_mutex_unlock(&my_context->mutex_trace);
+        mutex_unlock(&my_context->mutex_trace);
     }
 }
 
diff --git a/src/emu/x64tls.c b/src/emu/x64tls.c
index ecb0ce7a..a059ecfb 100755
--- a/src/emu/x64tls.c
+++ b/src/emu/x64tls.c
@@ -229,15 +229,15 @@ static tlsdatasize_t* setupTLSData(box64context_t* context)
 
 void* fillTLSData(box64context_t *context)
 {
-        pthread_mutex_lock(&context->mutex_tls);
+        mutex_lock(&context->mutex_tls);
         tlsdatasize_t *data = setupTLSData(context);
-        pthread_mutex_unlock(&context->mutex_tls);
+        mutex_unlock(&context->mutex_tls);
         return data;
 }
 
 void* resizeTLSData(box64context_t *context, void* oldptr)
 {
-        pthread_mutex_lock(&context->mutex_tls);
+        mutex_lock(&context->mutex_tls);
         tlsdatasize_t* oldata = (tlsdatasize_t*)oldptr;
         if(sizeTLSData(oldata->tlssize)!=sizeTLSData(context->tlssize) || (oldata->n_elfs/0xff)!=(context->elfsize/0xff)) {
             printf_log(LOG_INFO, "Warning, resizing of TLS occured! size: %d->%d / n_elfs: %d->%d\n", sizeTLSData(oldata->tlssize), sizeTLSData(context->tlssize), 1+(oldata->n_elfs/0xff), 1+(context->elfsize/0xff));
@@ -245,7 +245,7 @@ void* resizeTLSData(box64context_t *context, void* oldptr)
             // copy the relevent old part, in case something changed
             memcpy((void*)((uintptr_t)data->data-oldata->tlssize), (void*)((uintptr_t)oldata->data-oldata->tlssize), oldata->tlssize);
             // all done, update new size, free old pointer and exit
-            pthread_mutex_unlock(&context->mutex_tls);
+            mutex_unlock(&context->mutex_tls);
             free_tlsdatasize(oldptr);
             return data;
         } else {
@@ -267,7 +267,7 @@ void* resizeTLSData(box64context_t *context, void* oldptr)
                 }
                 oldata->n_elfs = context->elfsize;
             }
-            pthread_mutex_unlock(&context->mutex_tls);
+            mutex_unlock(&context->mutex_tls);
             return oldata;
         }
 }
diff --git a/src/include/box64context.h b/src/include/box64context.h
index 3d8e0061..c8360017 100755
--- a/src/include/box64context.h
+++ b/src/include/box64context.h
@@ -4,6 +4,9 @@
 #include <pthread.h>
 #include "pathcoll.h"
 #include "dictionnary.h"
+#ifdef DYNAREC
+#include "dynarec/native_lock.h"
+#endif
 
 typedef struct elfheader_s elfheader_t;
 typedef struct cleanup_s cleanup_t;
@@ -123,17 +126,21 @@ typedef struct box64context_s {
     kh_defaultversion_t *weakdefver;    // the weak default version for symbols (the XXX@@vvvv of symbols)
     vkprocaddess_t      vkprocaddress;
 
-    pthread_mutex_t     mutex_trace;
     #ifndef DYNAREC
     pthread_mutex_t     mutex_lock;     // dynarec build will use their own mecanism
+    pthread_mutex_t     mutex_trace;
+    pthread_mutex_t     mutex_tls;
+    pthread_mutex_t     mutex_thread;
+    pthread_mutex_t     mutex_bridge;
     #else
-    pthread_mutex_t     mutex_dyndump;
+    uint32_t            mutex_dyndump;
+    uint32_t            mutex_trace;
+    uint32_t            mutex_tls;
+    uint32_t            mutex_thread;
+    uint32_t            mutex_bridge;
     uintptr_t           max_db_size;    // the biggest (in x86_64 instructions bytes) built dynablock
     int                 trace_dynarec;
     #endif
-    pthread_mutex_t     mutex_tls;
-    pthread_mutex_t     mutex_thread;
-    pthread_mutex_t     mutex_bridge;
 
     library_t           *libclib;       // shortcut to libc library (if loaded, so probably yes)
     library_t           *sdl1mixerlib;
@@ -188,6 +195,17 @@ typedef struct box64context_s {
 
 } box64context_t;
 
+#ifndef DYNAREC
+#define mutex_lock(A)       pthread_mutex_lock(A)
+#define mutex_trylock(A)    pthread_mutex_trylock(A)
+#define mutex_unlock(A)     pthread_mutex_unlock(A)
+#else
+int GetTID();
+#define mutex_lock(A)       {uint32_t tid = (uint32_t)GetTID(); while(native_lock_storeifnull_d(A, tid)) sched_yield();}
+#define mutex_trylock(A)    native_lock_storeifnull_d(A, (uint32_t)GetTID())
+#define mutex_unlock(A)     native_lock_storeifref_d(A, 0, (uint32_t)GetTID())
+#endif
+
 extern box64context_t *my_context; // global context
 
 box64context_t *NewBox64Context(int argc);
diff --git a/src/include/threads.h b/src/include/threads.h
index 847d4ee6..d094dc0b 100755
--- a/src/include/threads.h
+++ b/src/include/threads.h
@@ -20,7 +20,9 @@ void fini_pthread_helper(box64context_t* context);
 // prepare an "emuthread structure" in pet and return address of function pointer for a "thread creation routine"
 void* my_prepare_thread(x64emu_t *emu, void* f, void* arg, int ssize, void** pet);
 
+#ifndef DYNAREC
 //check and unlock if a mutex is locked by current thread (works only for PTHREAD_MUTEX_ERRORCHECK typed mutex)
 int checkUnlockMutex(void* m);
+#endif
 
 #endif //_THREADS_H_
\ No newline at end of file
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index f2121f73..e9fc475f 100755
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -31,6 +31,7 @@
 #ifdef DYNAREC
 #include "dynablock.h"
 #include "../dynarec/dynablock_private.h"
+#include "dynarec_native.h"
 #endif
 
 
@@ -266,6 +267,8 @@ static void sigstack_key_alloc() {
 	pthread_key_create(&sigstack_key, sigstack_destroy);
 }
 
+//1<<8 is mutex_dyndump
+#define is_dyndump_locked (1<<8)
 uint64_t RunFunctionHandler(int* exit, x64_ucontext_t* sigcontext, uintptr_t fnc, int nargs, ...)
 {
     if(fnc==0 || fnc==1) {
@@ -698,7 +701,9 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
                 *old_code = -1;    // re-init the value to allow another segfault at the same place
             if(used_stack)  // release stack
                 new_ss->ss_flags = 0;
-            relockMutex(Locks);
+            //relockMutex(Locks);   // do not relock mutex, because of the siglongjmp, whatever was running is canceled
+            if(Locks & is_dyndump_locked)
+                CancelBlock64();
             siglongjmp(ejb->jmpbuf, 1);
         }
         printf_log(LOG_INFO, "Warning, context has been changed in Sigactionhanlder%s\n", (sigcontext->uc_mcontext.gregs[X64_RIP]!=sigcontext_copy.uc_mcontext.gregs[X64_RIP])?" (EIP changed)":"");
@@ -738,7 +743,9 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
 
     printf_log(LOG_DEBUG, "Sigactionhanlder main function returned (exit=%d, restorer=%p)\n", exits, (void*)restorer);
     if(exits) {
-        relockMutex(Locks);
+        //relockMutex(Locks);   // the thread will exit, so no relock there
+        if(Locks & is_dyndump_locked)
+            CancelBlock64();
         exit(ret);
     }
     if(restorer)
@@ -746,9 +753,9 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
     relockMutex(Locks);
 }
 
-extern __thread void* current_helper;
+extern void* current_helper;
 #ifdef DYNAREC
-static pthread_mutex_t mutex_dynarec_prot;
+static uint32_t mutex_dynarec_prot = 0;
 #endif
 
 extern int box64_quit;
@@ -792,14 +799,14 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
     int Locks = unlockMutex();
     uint32_t prot = getProtection((uintptr_t)addr);
 #ifdef DYNAREC
-    if((Locks & (1<<8)) && (sig==SIGSEGV) && current_helper) {//1<<8 is mutex_dyndump
+    if((Locks & is_dyndump_locked) && (sig==SIGSEGV) && current_helper) {
         relockMutex(Locks);
         cancelFillBlock();  // Segfault inside a Fillblock, cancel it's creation...
     }
     dynablock_t* db = NULL;
     int db_searched = 0;
     if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_CUSTOM)) {
-        pthread_mutex_lock(&mutex_dynarec_prot);
+        mutex_lock(&mutex_dynarec_prot);
         // check if SMC inside block
         db = FindDynablockFromNativeAddress(pc);
         db_searched = 1;
@@ -878,22 +885,24 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
                 } else {
                     dynarec_log(LOG_INFO, "Dynablock unprotected, getting out!\n");
                 }
-                relockMutex(Locks);
-                pthread_mutex_unlock(&mutex_dynarec_prot);
+                //relockMutex(Locks);
+                if(Locks & is_dyndump_locked)
+                    CancelBlock64();
+                mutex_unlock(&mutex_dynarec_prot);
                 siglongjmp(ejb->jmpbuf, 2);
             }
             dynarec_log(LOG_INFO, "Warning, Auto-SMC (%p for db %p/%p) detected, but jmpbuffer not ready!\n", (void*)addr, db, (void*)db->x64_addr);
         }
         // done
         if((prot&PROT_WRITE) || (prot&PROT_DYNAREC)) {
-            pthread_mutex_unlock(&mutex_dynarec_prot);
+            mutex_unlock(&mutex_dynarec_prot);
             // if there is no write permission, don't return and continue to program signal handling
             relockMutex(Locks);
             return;
         }
-        pthread_mutex_unlock(&mutex_dynarec_prot);
+        mutex_unlock(&mutex_dynarec_prot);
     } else if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && ((prot&(PROT_READ|PROT_WRITE))==(PROT_READ|PROT_WRITE))) {
-        pthread_mutex_lock(&mutex_dynarec_prot);
+        mutex_lock(&mutex_dynarec_prot);
         db = FindDynablockFromNativeAddress(pc);
         db_searched = 1;
         if(db && db->x64_addr>= addr && (db->x64_addr+db->x64_size)<addr) {
@@ -910,7 +919,7 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
                 glitch_addr = addr;
                 glitch_prot = prot;
                 relockMutex(Locks);
-                pthread_mutex_unlock(&mutex_dynarec_prot);
+                mutex_unlock(&mutex_dynarec_prot);
                 return; // try again
             }
 dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for %p, db=%p, prot=0x%x\n", pc, addr, db, prot);
@@ -932,14 +941,14 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for
                 refreshProtection((uintptr_t)addr);
                 relockMutex(Locks);
                 sched_yield();  // give time to the other process
-                pthread_mutex_unlock(&mutex_dynarec_prot);
+                mutex_unlock(&mutex_dynarec_prot);
                 return; // try again
             }
             glitch2_pc = NULL;
             glitch2_addr = NULL;
             glitch2_prot = 0;
         }
-        pthread_mutex_unlock(&mutex_dynarec_prot);
+        mutex_unlock(&mutex_dynarec_prot);
     }
     if(!db_searched)
         db = FindDynablockFromNativeAddress(pc);
@@ -1482,12 +1491,7 @@ EXPORT int my_swapcontext(x64emu_t* emu, void* ucp1, void* ucp2)
 #ifdef DYNAREC
 static void atfork_child_dynarec_prot(void)
 {
-    pthread_mutexattr_t attr;
-    pthread_mutexattr_init(&attr);
-    pthread_mutexattr_settype(&attr, PTHREAD_MUTEX_ERRORCHECK);
-    pthread_mutex_init(&mutex_dynarec_prot, &attr);
-
-    pthread_mutexattr_destroy(&attr);
+    native_lock_store(&mutex_dynarec_prot, 0);
 }
 #endif
 void init_signal_helper(box64context_t* context)
diff --git a/src/libtools/threads.c b/src/libtools/threads.c
index 7e70b160..0877335b 100755
--- a/src/libtools/threads.c
+++ b/src/libtools/threads.c
@@ -80,50 +80,50 @@ void CleanStackSize(box64context_t* context)
 	threadstack_t *ts;
 	if(!context || !context->stacksizes)
 		return;
-	pthread_mutex_lock(&context->mutex_thread);
+	mutex_lock(&context->mutex_thread);
 	kh_foreach_value(context->stacksizes, ts, box_free(ts));
 	kh_destroy(threadstack, context->stacksizes);
 	context->stacksizes = NULL;
-	pthread_mutex_unlock(&context->mutex_thread);
+	mutex_unlock(&context->mutex_thread);
 }
 
 void FreeStackSize(kh_threadstack_t* map, uintptr_t attr)
 {
-	pthread_mutex_lock(&my_context->mutex_thread);
+	mutex_lock(&my_context->mutex_thread);
 	khint_t k = kh_get(threadstack, map, attr);
 	if(k!=kh_end(map)) {
 		box_free(kh_value(map, k));
 		kh_del(threadstack, map, k);
 	}
-	pthread_mutex_unlock(&my_context->mutex_thread);
+	mutex_unlock(&my_context->mutex_thread);
 }
 
 void AddStackSize(kh_threadstack_t* map, uintptr_t attr, void* stack, size_t stacksize)
 {
 	khint_t k;
 	int ret;
-	pthread_mutex_lock(&my_context->mutex_thread);
+	mutex_lock(&my_context->mutex_thread);
 	k = kh_put(threadstack, map, attr, &ret);
 	threadstack_t* ts = kh_value(map, k) = (threadstack_t*)box_calloc(1, sizeof(threadstack_t));
 	ts->stack = stack;
 	ts->stacksize = stacksize;
-	pthread_mutex_unlock(&my_context->mutex_thread);
+	mutex_unlock(&my_context->mutex_thread);
 }
 
 // return stack from attr (or from current emu if attr is not found..., wich is wrong but approximate enough?)
 int GetStackSize(x64emu_t* emu, uintptr_t attr, void** stack, size_t* stacksize)
 {
 	if(emu->context->stacksizes && attr) {
-		pthread_mutex_lock(&my_context->mutex_thread);
+		mutex_lock(&my_context->mutex_thread);
 		khint_t k = kh_get(threadstack, emu->context->stacksizes, attr);
 		if(k!=kh_end(emu->context->stacksizes)) {
 			threadstack_t* ts = kh_value(emu->context->stacksizes, k);
 			*stack = ts->stack;
 			*stacksize = ts->stacksize;
-			pthread_mutex_unlock(&my_context->mutex_thread);
+			mutex_unlock(&my_context->mutex_thread);
 			return 1;
 		}
-		pthread_mutex_unlock(&my_context->mutex_thread);
+		mutex_unlock(&my_context->mutex_thread);
 	}
 	// should a Warning be emited?
 	*stack = emu->init_stack;
@@ -680,10 +680,10 @@ int EXPORT my_pthread_once(x64emu_t* emu, int* once, void* cb)
 	int old = native_lock_xchg_d(once, 1);
 	#else
 	int old = *once;	// outside of the mutex in case once is badly formed
-	pthread_mutex_lock(&my_context->mutex_lock);
+	mutex_lock(&my_context->mutex_lock);
 	old = *once;
 	*once = 1;
-	pthread_mutex_unlock(&my_context->mutex_lock);
+	mutex_unlock(&my_context->mutex_lock);
 	#endif
 	if(old)
 		return 0;
@@ -837,7 +837,11 @@ typedef struct mutexes_block_s {
 } mutexes_block_t;
 
 static mutexes_block_t *mutexes = NULL;
+#ifdef DYNAREC
+static uint32_t mutex_mutexes = 0;
+#else
 static pthread_mutex_t mutex_mutexes = PTHREAD_MUTEX_INITIALIZER;
+#endif
 
 static mutexes_block_t* NewMutexesBlock()
 {
@@ -847,7 +851,7 @@ static mutexes_block_t* NewMutexesBlock()
 }
 
 static int NewMutex() {
-	pthread_mutex_lock(&mutex_mutexes);
+	mutex_lock(&mutex_mutexes);
 	if(!mutexes) {
 		mutexes = NewMutexesBlock();
 	}
@@ -864,10 +868,10 @@ static int NewMutex() {
 	for (int i=0; i<MUTEXES_SIZE; ++i)
 		if(!m->taken[i]) {
 			m->taken[i] = 1;
-			pthread_mutex_unlock(&mutex_mutexes);
+			mutex_unlock(&mutex_mutexes);
 			return j*MUTEXES_SIZE + i;
 		}
-	pthread_mutex_unlock(&mutex_mutexes);
+	mutex_unlock(&mutex_mutexes);
 	printf_log(LOG_NONE, "Error: NewMutex unreachable part reached\n");
 	return (int)-1;	// error!!!!
 }
@@ -876,13 +880,13 @@ void FreeMutex(int k)
 {
 	if(!mutexes)
 		return;	//???
-	pthread_mutex_lock(&mutex_mutexes);
+	mutex_lock(&mutex_mutexes);
 	mutexes_block_t* m = mutexes;
 	for(int i=0; i<k/MUTEXES_SIZE; ++i)
 		m = m->next;
 	m->taken[k%MUTEXES_SIZE] = 0;
 	++m->n_free;
-	pthread_mutex_unlock(&mutex_mutexes);
+	mutex_unlock(&mutex_mutexes);
 }
 
 void FreeAllMutexes(mutexes_block_t* m)
@@ -1307,6 +1311,7 @@ void fini_pthread_helper(box64context_t* context)
 	}
 }
 
+#ifndef DYNAREC
 int checkUnlockMutex(void* m)
 {
 	pthread_mutex_t* mutex = (pthread_mutex_t*)m;
@@ -1316,3 +1321,4 @@ int checkUnlockMutex(void* m)
 	}
 	return 0;
 }
+#endif
\ No newline at end of file
diff --git a/src/tools/bridge.c b/src/tools/bridge.c
index 61960898..514ce989 100755
--- a/src/tools/bridge.c
+++ b/src/tools/bridge.c
@@ -105,7 +105,7 @@ uintptr_t AddBridge(bridge_t* bridge, wrapper_t w, void* fnc, int N, const char*
     int sz = -1;
     int ret;
 
-    pthread_mutex_lock(&my_context->mutex_bridge);
+    mutex_lock(&my_context->mutex_bridge);
     b = bridge->last;
     if(b->sz == NBRICK) {
         b->next = NewBrick(b->b);
@@ -117,7 +117,7 @@ uintptr_t AddBridge(bridge_t* bridge, wrapper_t w, void* fnc, int N, const char*
     // add bridge to map, for fast recovery
     khint_t k = kh_put(bridgemap, bridge->bridgemap, (uintptr_t)fnc, &ret);
     kh_value(bridge->bridgemap, k) = (uintptr_t)&b->b[sz].CC;
-    pthread_mutex_unlock(&my_context->mutex_bridge);
+    mutex_unlock(&my_context->mutex_bridge);
 
     #ifdef DYNAREC
     int prot = 0;
@@ -234,7 +234,7 @@ uintptr_t AddVSyscall(bridge_t* bridge, int num)
     int prot = 0;
     do {
         #endif
-        pthread_mutex_lock(&my_context->mutex_bridge);
+        mutex_lock(&my_context->mutex_bridge);
         b = bridge->last;
         if(b->sz == NBRICK) {
             b->next = NewBrick(b->b);
@@ -243,7 +243,7 @@ uintptr_t AddVSyscall(bridge_t* bridge, int num)
         }
 	    sz = b->sz;
         #ifdef DYNAREC
-        pthread_mutex_unlock(&my_context->mutex_bridge);
+        mutex_unlock(&my_context->mutex_bridge);
         if(box64_dynarec) {
             prot=(getProtection((uintptr_t)b->b)&(PROT_DYNAREC|PROT_DYNAREC_R))?1:0;
             if(prot)
@@ -252,7 +252,7 @@ uintptr_t AddVSyscall(bridge_t* bridge, int num)
                 addDBFromAddressRange((uintptr_t)&b->b[b->sz].CC, sizeof(onebridge_t));
         }
     } while(sz!=b->sz); // this while loop if someone took the slot when the bridge mutex was unlocked doing memory protection managment
-    pthread_mutex_lock(&my_context->mutex_bridge);
+    mutex_lock(&my_context->mutex_bridge);
     #endif
     b->sz++;
     b->b[sz].B8 = 0xB8;
@@ -260,7 +260,7 @@ uintptr_t AddVSyscall(bridge_t* bridge, int num)
     b->b[sz]._0F = 0x0F;
     b->b[sz]._05 = 0x05;
     b->b[sz]._C3 = 0xC3;
-    pthread_mutex_unlock(&my_context->mutex_bridge);
+    mutex_unlock(&my_context->mutex_bridge);
     #ifdef DYNAREC
     if(box64_dynarec)
         protectDB((uintptr_t)b->b, NBRICK*sizeof(onebridge_t));