about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-11-05 21:17:14 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-11-05 21:17:14 +0100
commitf73e4193f032713529e26c4537e5fac44147c040 (patch)
tree6aa77ae5b0cb2020a7da473d43d2bf0fc4b90efc /src
parent841003bc472bbfc947e5f89f5eddae341ba7e609 (diff)
downloadbox64-f73e4193f032713529e26c4537e5fac44147c040.tar.gz
box64-f73e4193f032713529e26c4537e5fac44147c040.zip
Chnaged memory protection tracking to reduce usage of lock, especially for [DYNAREC] reduced mutrex and chances of deadlock (help RimWorld)
Diffstat (limited to 'src')
-rw-r--r--src/custommem.c316
-rwxr-xr-xsrc/dynarec/dynablock.c12
-rwxr-xr-xsrc/dynarec/dynarec_arm64.c12
-rw-r--r--src/include/custommem.h4
-rwxr-xr-xsrc/libtools/signals.c4
5 files changed, 177 insertions, 171 deletions
diff --git a/src/custommem.c b/src/custommem.c
index 171855e4..54864d90 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -41,12 +41,12 @@ static uintptr_t**         box64_jmptbldefault2[1<<JMPTABL_SHIFT];
 static uintptr_t*          box64_jmptbldefault1[1<<JMPTABL_SHIFT];
 static uintptr_t           box64_jmptbldefault0[1<<JMPTABL_SHIFT];
 #endif
-#define MEMPROT_SHIFT 12
-#define MEMPROT_SHIFT2 (32-MEMPROT_SHIFT)
-#define MEMPROT_SIZE (1<<(32-MEMPROT_SHIFT))
 static pthread_mutex_t     mutex_prot;
-KHASH_MAP_INIT_INT(memprot, uint8_t*)
-static kh_memprot_t        *memprot;
+#define MEMPROT_SHIFT 12
+#define MEMPROT_SHIFT2 (16+12)
+#define MEMPROT_SIZE (1<<16)
+static uint8_t *memprot[1<<20];    // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection]
+static uint8_t memprot_default[MEMPROT_SIZE];
 static int inited = 0;
 
 typedef struct blocklist_s {
@@ -760,44 +760,33 @@ uintptr_t getJumpTableAddress64(uintptr_t addr)
 }
 
 // Remove the Write flag from an adress range, so DB can be executed safely
-void protectDBnolock(uintptr_t addr, uintptr_t size)
+void protectDB(uintptr_t addr, uintptr_t size)
 {
     dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return;
+    pthread_mutex_lock(&mutex_prot);
     int ret;
-    for (uintptr_t i=idx; i<=end; ++i) {
-        const uint32_t key = (i>>MEMPROT_SHIFT2)&0xffffffff;
-        khint_t k = kh_put(memprot, memprot, key, &ret);
-        if(ret) {
-            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
-            kh_value(memprot, k) = m;
+    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
+        if(memprot[i]==memprot_default) {
+            uint8_t* newblock = calloc(1<<16, sizeof(uint8_t));
+            if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
+                free(newblock);
+            }
         }
-        const uintptr_t ii = i&(MEMPROT_SIZE-1);
-        uint8_t prot = kh_value(memprot, k)[ii];
+    for (uintptr_t i=idx; i<=end; ++i) {
+        uint32_t prot = memprot[i>>16][i&0xffff];
         if(!(prot&PROT_DYNAREC)) {
             if(!prot)
                 prot = PROT_READ | PROT_WRITE;    // comes from malloc & co, so should not be able to execute
-            kh_value(memprot, k)[ii] = prot|PROT_DYNAREC;
-            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~(PROT_WRITE|PROT_CUSTOM));
+            memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC;   // need to use atomic exchange?
+            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
         }
     }
-}
-
-void protectDB(uintptr_t addr, size_t size)
-{
-    pthread_mutex_lock(&mutex_prot);
-    protectDBnolock(addr, size);
-    pthread_mutex_unlock(&mutex_prot);
-}
-
-void lockDB()
-{
-    pthread_mutex_lock(&mutex_prot);
-}
-
-void unlockDB()
-{
     pthread_mutex_unlock(&mutex_prot);
 }
 
@@ -807,90 +796,102 @@ void unprotectDB(uintptr_t addr, size_t size)
 {
     dynarec_log(LOG_DEBUG, "unprotectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
-    int ret;
+    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return;
     pthread_mutex_lock(&mutex_prot);
-    for (uintptr_t i=idx; i<=end; ++i) {
-        const uint32_t key = (i>>MEMPROT_SHIFT2)&0xffffffff;
-        khint_t k = kh_put(memprot, memprot, key, &ret);
-        if(ret) {
-            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
-            kh_value(memprot, k) = m;
+    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
+        if(memprot[i]==memprot_default) {
+            uint8_t* newblock = calloc(1<<16, sizeof(uint8_t));
+            if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
+                free(newblock);
+            }
         }
-        const uintptr_t ii = i&(MEMPROT_SIZE-1);
-        uint8_t prot = kh_value(memprot, k)[ii];
+    for (uintptr_t i=idx; i<=end; ++i) {
+        uint32_t prot = memprot[i>>16][i&0xffff];
         if(prot&PROT_DYNAREC) {
-            kh_value(memprot, k)[ii] = prot&~PROT_DYNAREC;
-            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_CUSTOM);
+            memprot[i>>16][i&0xffff] = prot&~PROT_DYNAREC;  // need to use atomic exchange?
+            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_DYNAREC);
             cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0);
         }
     }
     pthread_mutex_unlock(&mutex_prot);
 }
 
+int isprotectedDB(uintptr_t addr, size_t size)
+{
+    dynarec_log(LOG_DEBUG, "isprotectedDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
+    uintptr_t idx = (addr>>MEMPROT_SHIFT);
+    uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return 0;
+    for (uintptr_t i=idx; i<=end; ++i) {
+        uint32_t prot = memprot[i>>16][i&0xffff];
+        if(!(prot&PROT_DYNAREC)) {
+            return 0;
+        }
+    }
+    return 1;
+}
+
 #endif
 
 void updateProtection(uintptr_t addr, size_t size, uint32_t prot)
 {
     dynarec_log(LOG_DEBUG, "updateProtection %p:%p 0x%x\n", (void*)addr, (void*)(addr+size-1), prot);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
-    int ret;
-    uintptr_t last = idx<<MEMPROT_SHIFT;
-    uint8_t oldprot = 0xff;
+    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return;
     pthread_mutex_lock(&mutex_prot);
-    for (uintptr_t i=idx; i<=end; ++i) {
-        const uint32_t key = (i>>MEMPROT_SHIFT2)&0xffffffff;
-        khint_t k = kh_put(memprot, memprot, key, &ret);
-        if(ret) {
-            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
-            kh_value(memprot, k) = m;
-        }
-        const uintptr_t start = i&(MEMPROT_SIZE-1);
-        const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-        uint8_t* block = kh_value(memprot, k);
-        for(uintptr_t ii = start; ii<=finish; ++ii) {
-            uint32_t dyn = block[ii]&PROT_DYNAREC;
-            if(dyn && (prot&PROT_WRITE)) {   // need to remove the write protection from this block
-                if(oldprot!=prot) {
-                    if(oldprot!=0xff)
-                        mprotect((void*)last, (i<<MEMPROT_SHIFT)-last, oldprot&~PROT_CUSTOM); // need to optimize
-                    last = i<<MEMPROT_SHIFT;
-                    oldprot = prot;
-                }
-            } else if(prot!=0xff) {
-                mprotect((void*)last, (i<<MEMPROT_SHIFT)-last, oldprot&~PROT_CUSTOM); // need to optimize
-                last = i << MEMPROT_SHIFT;
-                oldprot = 0xff;
+    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
+        if(memprot[i]==memprot_default) {
+            uint8_t* newblock = calloc(1<<16, sizeof(uint8_t));
+#if 0 //def ARM64   //disabled for now, not usefull with the mutex
+            if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
+                free(newblock);
             }
-            block[ii] = prot|dyn|PROT_ALLOC;
+#else
+            memprot[i] = newblock;
+#endif
         }
-        i+=finish-start;    // +1 from the "for" loop
+    for (uintptr_t i=idx; i<=end; ++i) {
+        uint32_t dyn=(memprot[i>>16][i&0xffff]&PROT_DYNAREC);
+        if(dyn && (prot&PROT_WRITE))    // need to remove the write protection from this block
+            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
+        memprot[i>>16][i&0xffff] = prot|dyn;
     }
-    if(oldprot!=0xff)
-        mprotect((void*)last, (end<<MEMPROT_SHIFT)-last, oldprot&~PROT_CUSTOM); // need to optimize
     pthread_mutex_unlock(&mutex_prot);
 }
 
 void setProtection(uintptr_t addr, size_t size, uint32_t prot)
 {
-    dynarec_log(LOG_DEBUG, "setProtection %p:%p 0x%x\n", (void*)addr, (void*)(addr+size-1), prot);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
-    int ret;
+    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return;
     pthread_mutex_lock(&mutex_prot);
-    for (uintptr_t i=idx; i<=end; ++i) {
-        const uint32_t key = (i>>MEMPROT_SHIFT2)&0xffffffff;
-        khint_t k = kh_put(memprot, memprot, key, &ret);
-        if(ret) {
-            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
-            kh_value(memprot, k) = m;
+    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
+        if(memprot[i]==memprot_default) {
+            uint8_t* newblock = calloc(1<<16, sizeof(uint8_t));
+#if 0 //def ARM64   //disabled for now, not usefull with the mutex
+            if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
+                free(newblock);
+            }
+#else
+            memprot[i] = newblock;
+#endif
         }
-        const uintptr_t start = i&(MEMPROT_SIZE-1);
-        const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-        memset(kh_value(memprot, k)+start, prot|PROT_ALLOC, finish-start+1);
-        i+=finish-start;    // +1 from the "for" loop
-    }
+    for (uintptr_t i=idx; i<=end; ++i)
+        memprot[i>>16][i&0xffff] = prot;
     pthread_mutex_unlock(&mutex_prot);
 }
 
@@ -899,18 +900,26 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
     dynarec_log(LOG_DEBUG, "allocProtection %p:%p 0x%x\n", (void*)addr, (void*)(addr+size-1), prot);
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
-    int ret;
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return;
     pthread_mutex_lock(&mutex_prot);
-    for (uintptr_t i=idx; i<=end; ++i) {
-        const uint32_t key = (i>>MEMPROT_SHIFT2)&0xffffffff;
-        khint_t k = kh_put(memprot, memprot, key, &ret);
-        if(ret) {
-            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
-            kh_value(memprot, k) = m;
+    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
+        if(memprot[i]==memprot_default) {
+            uint8_t* newblock = calloc(1<<16, sizeof(uint8_t));
+#if 0 //def ARM64   //disabled for now, not usefull with the mutex
+            if (arm64_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
+                free(newblock);
+            }
+#else
+            memprot[i] = newblock;
+#endif
         }
+    for (uintptr_t i=idx; i<=end; ++i) {
         const uintptr_t start = i&(MEMPROT_SIZE-1);
         const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-        uint8_t* block = kh_value(memprot, k);
+        uint8_t* block = memprot[i>>16];
         for(uintptr_t ii = start; ii<=finish; ++ii) {
             if(!block[ii])
                 block[ii] = prot;
@@ -933,7 +942,7 @@ void loadProtectionFromMap()
         uintptr_t s, e;
         if(sscanf(buf, "%lx-%lx %c%c%c", &s, &e, &r, &w, &x)==5) {
             int prot = ((r=='r')?PROT_READ:0)|((w=='w')?PROT_WRITE:0)|((x=='x')?PROT_EXEC:0);
-            allocProtection(s, e-s, prot);
+            allocProtection(s, e-s, prot|PROT_ALLOC);
         }
     }
     fclose(f);
@@ -941,8 +950,9 @@ void loadProtectionFromMap()
 
 static int blockempty(uint8_t* mem)
 {
-    for (int i=0; i<(MEMPROT_SIZE); ++i)
-        if(mem[i])
+    uint32_t *p4 = (uint32_t*)mem;
+    for (int i=0; i<(MEMPROT_SIZE)/4; ++i, ++p4)
+        if(*p4)
             return 0;
     return 1;
 }
@@ -952,19 +962,35 @@ void freeProtection(uintptr_t addr, size_t size)
     dynarec_log(LOG_DEBUG, "freeProtection %p:%p\n", (void*)addr, (void*)(addr+size-1));
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
     uintptr_t end = ((addr+size-1LL)>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(20+16)))
+        end = (1LL<<(20+16))-1;
+    if(end<idx) // memory addresses higher than 48bits are not tracked
+        return;
     pthread_mutex_lock(&mutex_prot);
     for (uintptr_t i=idx; i<=end; ++i) {
-        const uint32_t key = (i>>MEMPROT_SHIFT2)&0xffffffff;
-        khint_t k = kh_get(memprot, memprot, key);
-        if(k!=kh_end(memprot)) {
+        const uint32_t key = (i>>16);
+        if(memprot[key]!=memprot_default) {
             const uintptr_t start = i&(MEMPROT_SIZE-1);
             const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-            uint8_t *block = kh_value(memprot, k);
+            uint8_t *block = memprot[key];
             memset(block+start, 0, finish-start+1);
+#if 0 //def ARM64   //disabled for now, not usefull with the mutex
+            if (blockempty(block)) {
+                block = (void*)arm64_lock_xchg(&memprot[key], (uintptr_t)memprot_default);
+                if(!blockempty(block)) {
+                    block = (void*)arm64_lock_xchg(&memprot[key], (uintptr_t)block);
+                    for (int i = 0; i < 0x10000; ++i) {
+                        memprot[key][i] |= block[i];
+                    }
+                }
+                if (block != memprot_default) free(block);
+            }
+#else
             if(blockempty(block)) {
+                memprot[key] = memprot_default;
                 free(block);
-                kh_del(memprot, memprot, k);
             }
+#endif
             i+=finish-start;    // +1 from the "for" loop
         }
     }
@@ -973,65 +999,52 @@ void freeProtection(uintptr_t addr, size_t size)
 
 uint32_t getProtection(uintptr_t addr)
 {
-    const uint32_t key = (addr>>32)&0xffffffff;
-    pthread_mutex_lock(&mutex_prot);
-    khint_t k = kh_get(memprot, memprot, key);
-    if(k==kh_end(memprot)) {
-        pthread_mutex_unlock(&mutex_prot);
+    if(addr>=(1LL<<48))
         return 0;
-    }
-    const uintptr_t idx = ((addr&0xffffffff)>>MEMPROT_SHIFT);
-    uint32_t ret = kh_val(memprot, k)[idx];
-    pthread_mutex_unlock(&mutex_prot);
+    const uintptr_t idx = (addr>>MEMPROT_SHIFT);
+    uint32_t ret = memprot[idx>>16][idx&0xffff];
     return ret;
 }
 
 #define LOWEST (void*)0x20000
-int availableBlock(uint8_t* p, size_t n)
-{
-    for (size_t i=0; i<n; ++i, ++p)
-        if(*p)
-            return 0;
-    return 1;
-}
 static uintptr_t nextFree(uintptr_t addr)
 {
+    if(addr>=(1LL<<48))
+        return 0;
     do {
-        const uint32_t key = (addr>>32)&0xffffffff;
-        khint_t k = kh_get(memprot, memprot, key);
-        if(k==kh_end(memprot)) {
+        uintptr_t idx = (addr>>MEMPROT_SHIFT);
+        if(memprot[idx>>16]==memprot_default) {
             return addr;
         }
-        uint8_t *block = kh_value(memprot, k);
-        for (uintptr_t i=(addr&0xffffffffLL)>>MEMPROT_SHIFT; i<MEMPROT_SIZE; ++i)
-            if(!block[i]) {
-                return (addr&~0xffffffffLL)+(i<<MEMPROT_SHIFT);
+        for (uintptr_t i=(idx&0xffff); i<MEMPROT_SIZE; ++i)
+            if(!memprot[idx>>16][i]) {
+                return ((idx>>16)<<(16+12))+(i<<MEMPROT_SHIFT);
             }
-        addr += 0x100000000LL;
-        addr &= ~0xffffffffLL;
+        addr += (1LL<<(16+12));
+        addr &= ~((1LL<<(16+12)-1LL));
     } while(1);
 }
 static uintptr_t maxFree(uintptr_t addr, uintptr_t sz)
 {
+    if(addr>=(1LL<<48))
+        return 0;
     uintptr_t mfree = 0;
     do {
-        const uint32_t key = (addr>>32)&0xffffffff;
-        khint_t k = kh_get(memprot, memprot, key);
-        if(k==kh_end(memprot)) {
-            mfree+=0x100000000LL;
+        uintptr_t idx = (addr>>MEMPROT_SHIFT);
+        if(memprot[idx>>16]==memprot_default) {
+            mfree+=(1LL<<(16+12));
             if(mfree>sz) {
                 return addr;
             }
         } else {
-            uint8_t *block = kh_value(memprot, k);
-            for (uintptr_t i=(addr&0xffffffffLL)>>MEMPROT_SHIFT; i<MEMPROT_SIZE; ++i)
-                if(!block[i]) {
-                    mfree+=1<<MEMPROT_SHIFT;
+            for (uintptr_t i=(idx&0xffffLL); i<MEMPROT_SIZE; ++i)
+                if(!memprot[idx>>16][i]) {
+                    mfree+=(1<<MEMPROT_SHIFT);
                 } else
                     return mfree;
         }
-        addr += 0x100000000LL;
-        addr &= ~0xffffffffLL;
+        addr += (1LL<<(12+16));
+        addr &= ~((1LL<<(12+16))-1LL);
     } while(1);
 }
 void* find32bitBlock(size_t size)
@@ -1042,12 +1055,10 @@ void* find47bitBlock(size_t size)
 {
     // slow iterative search... Would need something better one day
     uintptr_t addr = 0x100000000LL;
-    pthread_mutex_lock(&mutex_prot);
     do {
         addr = nextFree(addr);
         uintptr_t sz = maxFree(addr, size);
         if(sz>=size) {
-            pthread_mutex_unlock(&mutex_prot);
             return (void*)addr;
         }
         addr += sz;
@@ -1058,12 +1069,10 @@ void* find47bitBlock(size_t size)
         addr = nextFree(addr);
         uintptr_t sz = maxFree(addr, size);
         if(sz>=size) {
-            pthread_mutex_unlock(&mutex_prot);
             return (void*)addr;
         }
         addr += sz;
     } while(addr<0x100000000LL);
-    pthread_mutex_unlock(&mutex_prot);
     printf_log(LOG_NONE, "Warning: cannot find a 0x%zx block in 47bits address space\n", size);
     return NULL;
 }
@@ -1071,17 +1080,14 @@ void* find47bitBlockNearHint(void* hint, size_t size)
 {
     // slow iterative search... Would need something better one day
     uintptr_t addr = (uintptr_t)hint;
-    pthread_mutex_lock(&mutex_prot);
     do {
         addr = nextFree(addr);
         uintptr_t sz = maxFree(addr, size);
         if(sz>=size) {
-            pthread_mutex_unlock(&mutex_prot);
             return (void*)addr;
         }
         addr += sz;
     } while(addr<0x800000000000LL);
-    pthread_mutex_unlock(&mutex_prot);
     printf_log(LOG_NONE, "Warning: cannot find a 0x%zx block in 32bits address space\n", size);
     return NULL;
 }
@@ -1089,17 +1095,14 @@ void* findBlockNearHint(void* hint, size_t size)
 {
     // slow iterative search... Would need something better one day
     uintptr_t addr = (uintptr_t)hint;
-    pthread_mutex_lock(&mutex_prot);
     do {
         addr = nextFree(addr);
         uintptr_t sz = maxFree(addr, size);
         if(sz>=size) {
-            pthread_mutex_unlock(&mutex_prot);
             return (void*)addr;
         }
         addr += sz;
     } while(addr<0x100000000LL);
-    pthread_mutex_unlock(&mutex_prot);
     printf_log(LOG_NONE, "Warning: cannot find a 0x%zx block in 32bits address space\n", size);
     return NULL;
 }
@@ -1163,7 +1166,9 @@ void init_custommem_helper(box64context_t* ctx)
     if(inited) // already initialized
         return;
     inited = 1;
-    memprot = kh_init(memprot);
+    memset(memprot_default, 0, sizeof(memprot_default));
+    for(int i=0; i<(1<<20); ++i)
+        memprot[i] = memprot_default;
     init_mutexes();
 #ifdef DYNAREC
 #ifdef ARM64
@@ -1242,10 +1247,11 @@ void fini_custommem_helper(box64context_t *ctx)
     }
 #endif
     uint8_t* m;
-    kh_foreach_value(memprot, m,
-        free(m);
-    );
-    kh_destroy(memprot, memprot);
+    for(int i=0; i<(1<<20); ++i) {
+        m = memprot[i];
+        if(m!=memprot_default)
+            free(m);
+    }
 
     for(int i=0; i<n_blocks; ++i)
         #ifdef USE_MMAP
diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c
index 321bd25d..469357a7 100755
--- a/src/dynarec/dynablock.c
+++ b/src/dynarec/dynablock.c
@@ -371,8 +371,7 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
                         dblist->maxsz = blocksz;
             }
         }
-        lockDB();
-        protectDBnolock((uintptr_t)block->x64_addr, block->x64_size);
+        protectDB((uintptr_t)block->x64_addr, block->x64_size);
         // fill-in jumptable
         addJumpTableIfDefault64(block->x64_addr, block->block);
         for(int i=0; i<block->sons_size; ++i) {
@@ -380,7 +379,6 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t
             block->sons[i]->done = 1;
         }
         block->done = 1;
-        unlockDB();
     }
 
     dynarec_log(LOG_DEBUG, " --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:0)), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0);
@@ -405,13 +403,11 @@ dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t**
             db = internalDBGetBlock(emu, addr, addr, create, *current);
         } else {
             father->need_test = 0;
-            lockDB();
-            protectDBnolock((uintptr_t)father->x64_addr, father->x64_size);
+            protectDB((uintptr_t)father->x64_addr, father->x64_size);
             // fill back jumptable
             addJumpTableIfDefault64(father->x64_addr, father->block);
             for(int i=0; i<father->sons_size; ++i)
                 addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
-            unlockDB();
         }
     } 
     return db;
@@ -433,13 +429,11 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr)
             db = internalDBGetBlock(emu, addr, filladdr, create, NULL);
         } else {
             father->need_test = 0;
-            lockDB();
-            protectDBnolock((uintptr_t)father->x64_addr, father->x64_size);
+            protectDB((uintptr_t)father->x64_addr, father->x64_size);
             // fill back jumptable
             addJumpTableIfDefault64(father->x64_addr, father->block);
             for(int i=0; i<father->sons_size; ++i)
                 addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
-            unlockDB();
         }
     } 
     return db;
diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c
index c4f4dd6a..3df317b3 100755
--- a/src/dynarec/dynarec_arm64.c
+++ b/src/dynarec/dynarec_arm64.c
@@ -338,6 +338,8 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
         block->done = 1;
         return (void*)block;
     }
+    // protect the 1st page
+    protectDB(addr, 1);
     // init the helper
     dynarec_arm_t helper = {0};
     helper.start = addr;
@@ -348,10 +350,16 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
         block->done = 1;
         return (void*)block;
     }
+    if(!isprotectedDB(addr, 1)) {
+        dynarec_log(LOG_DEBUG, "Warning, write on purge on pass0 (%p)\n", (void*)addr);
+        block->done = 1;
+        return (void*)block;
+    }
     helper.cap = helper.size+3; // needs epilog handling
     helper.insts = (instruction_arm64_t*)calloc(helper.cap, sizeof(instruction_arm64_t));
     // already protect the block and compute hash signature
-    protectDB(addr, end-addr);  //end is 1byte after actual end
+    if((addr&~0xfff)!=(end&~0xfff)) // need to protect some other pages too
+        protectDB(addr, end-addr);  //end is 1byte after actual end
     uint32_t hash = X31_hash_code((void*)addr, end-addr);
     // pass 1, addresses, x64 jump addresses, flags
     arm_pass1(&helper, addr);
@@ -453,7 +461,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) {
     block->x64_size = end-start;
     block->hash = X31_hash_code(block->x64_addr, block->x64_size);
     // Check if something changed, to abbort if it as
-    if(block->hash != hash) {
+    if((block->hash != hash) || !isprotectedDB(addr, end-addr)) {
         dynarec_log(LOG_INFO, "Warning, a block changed while beeing processed hash(%p:%ld)=%x/%x\n", block->x64_addr, block->x64_size, block->hash, hash);
         free(helper.sons_x64);
         free(helper.sons_arm);
diff --git a/src/include/custommem.h b/src/include/custommem.h
index 4e4ff6b4..156d0c11 100644
--- a/src/include/custommem.h
+++ b/src/include/custommem.h
@@ -45,10 +45,8 @@ uint32_t getProtection(uintptr_t addr);
 void loadProtectionFromMap();
 #ifdef DYNAREC
 void protectDB(uintptr_t addr, size_t size);
-void protectDBnolock(uintptr_t addr, size_t size);
 void unprotectDB(uintptr_t addr, size_t size);
-void lockDB();
-void unlockDB();
+int isprotectedDB(uintptr_t addr, size_t size);
 #endif
 void* find32bitBlock(size_t size);
 void* findBlockNearHint(void* hint, size_t size);
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index 191267c4..2d3a8aed 100755
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -310,8 +310,8 @@ uint64_t RunFunctionHandler(int* exit, x64_ucontext_t* sigcontext, uintptr_t fnc
     int oldquitonlongjmp = emu->quitonlongjmp;
     emu->quitonlongjmp = 2;
     
-    //EmuCall(emu, fnc);  // avoid DynaCall for now
-    DynaCall(emu, fnc);
+    EmuCall(emu, fnc);  // avoid DynaCall for now
+    //DynaCall(emu, fnc);
     if(nargs>6)
         R_RSP+=((nargs-6)*sizeof(void*));