about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-10-01 08:38:57 +0200
committerptitSeb <sebastien.chev@gmail.com>2023-10-01 08:38:57 +0200
commit1c680d3aaa1aa6b018c7902d61616b2c1257e146 (patch)
tree4d6aff4fa24de197346e099d6e67d6fc9481c344 /src
parenteed9ea35f9987f3e28ec87845c064a84f728b56f (diff)
downloadbox64-1c680d3aaa1aa6b018c7902d61616b2c1257e146.tar.gz
box64-1c680d3aaa1aa6b018c7902d61616b2c1257e146.zip
[DYNAREC] Changed lock mecanism for memory protection
Diffstat (limited to 'src')
-rw-r--r--src/custommem.c231
-rw-r--r--src/dynarec/arm64/arm64_lock.S15
-rw-r--r--src/dynarec/arm64/arm64_lock.h9
-rw-r--r--src/dynarec/native_lock.h6
-rw-r--r--src/dynarec/rv64/rv64_lock.S19
-rw-r--r--src/dynarec/rv64/rv64_lock.h6
-rw-r--r--src/include/custommem.h1
-rw-r--r--src/libtools/signals.c1
8 files changed, 203 insertions, 85 deletions
diff --git a/src/custommem.c b/src/custommem.c
index 690a24aa..69638c73 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -478,6 +478,62 @@ void customFree(void* p)
 }
 
 #ifdef DYNAREC
+#define GET_PROT_WAIT(A, B) \
+        uint32_t A;         \
+        do {                \
+            A = native_lock_xchg_b(&block[B], PROT_WAIT);    \
+        } while(A==PROT_WAIT)
+#define GET_PROT(A, B)      \
+        uint32_t A;         \
+        do {                \
+            A = native_lock_get_b(&block[B]);   \
+        } while(A==PROT_WAIT)
+
+#define SET_PROT(A, B)      native_lock_storeb(&block[A], B)
+#define LOCK_NODYNAREC()
+#define UNLOCK_DYNAREC()    mutex_unlock(&mutex_prot)
+#define UNLOCK_NODYNAREC()
+static uint8_t* getProtBlock(uintptr_t idx, int fill)
+{
+    uint8_t* block = (uint8_t*)native_lock_get_dd(&memprot[idx].prot);
+    if(fill && block==memprot_default) {
+        uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
+        if(native_lock_storeifref(&memprot[idx].prot, newblock, memprot_default)==newblock)
+        {
+            block = newblock;
+#ifdef TRACE_MEMSTAT
+            memprot_allocated += (1<<16) * sizeof(uint8_t);
+            if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated;
+#endif
+        } else {
+            box_free(newblock);
+        }
+    }
+    return block;
+}
+#else
+#define GET_PROT_WAIT(A, B) uint32_t A = block[B]
+#define GET_PROT(A, B)      uint32_t A = block[B]
+#define SET_PROT(A, B)      block[A] = B
+#define LOCK_NODYNAREC()    mutex_lock(&mutex_prot)
+#define UNLOCK_DYNAREC()
+#define UNLOCK_NODYNAREC()  mutex_unlock(&mutex_prot)
+static uint8_t* getProtBlock(uintptr_t idx, int fill)
+{
+    uint8_t* block = memprot[idx].prot;
+    if(fill && block==memprot_default) {
+        block = box_calloc(1<<16, sizeof(uint8_t));
+        memprot[idx].prot = block;
+#ifdef TRACE_MEMSTAT
+        memprot_allocated += (1<<16) * sizeof(uint8_t);
+        if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated;
+#endif
+    }
+    return block;
+}
+#endif
+
+#ifdef DYNAREC
 #define NCHUNK          64
 typedef struct mmaplist_s {
     blocklist_t         chunks[NCHUNK];
@@ -858,19 +914,18 @@ void protectDB(uintptr_t addr, uintptr_t size)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     if(end<idx) // memory addresses higher than 48bits are not tracked
         return;
-    mutex_lock(&mutex_prot);
     int ret;
-    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i].prot==memprot_default) {
-            uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
-            memprot[i].prot = newblock;
-#ifdef TRACE_MEMSTAT
-            memprot_allocated += (1<<16) * sizeof(uint8_t);
-            if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated;
-#endif
-        }
+    uintptr_t bidx = ~1LL;
+    uint8_t* block = NULL;
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t prot = memprot[i>>16].prot[i&0xffff];
+        if(i>>16!=bidx) {
+            bidx = i>>16;
+            block = getProtBlock(bidx, 1);
+        }
+        uint32_t prot;
+        do {
+            prot = native_lock_xchg_b(&block[i&0xffff], PROT_WAIT);
+        } while(prot==PROT_WAIT);
         uint32_t dyn = prot&PROT_DYN;
         uint32_t mapped = prot&PROT_MMAP;
         if(!prot)
@@ -878,13 +933,14 @@ void protectDB(uintptr_t addr, uintptr_t size)
         prot&=~PROT_CUSTOM;
         if(!(dyn&PROT_NOPROT)) {
             if(prot&PROT_WRITE) {
-                if(!dyn) mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
-                    memprot[i>>16].prot[i&0xffff] = prot|mapped|PROT_DYNAREC;   // need to use atomic exchange?
-                } else 
-                    memprot[i>>16].prot[i&0xffff] = prot|mapped|PROT_DYNAREC_R;
+                if(!dyn) 
+                    mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
+                prot |= mapped|PROT_DYNAREC;
+            } else 
+                prot |= mapped|PROT_DYNAREC_R;
         }
+        native_lock_storeb(&block[i&0xffff], prot);
     }
-    mutex_unlock(&mutex_prot);
 }
 
 // Add the Write flag from an adress range, and mark all block as dirty
@@ -898,25 +954,27 @@ void unprotectDB(uintptr_t addr, size_t size, int mark)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     if(end<idx) // memory addresses higher than 48bits are not tracked
         return;
-    mutex_lock(&mutex_prot);
     for (uintptr_t i=idx; i<=end; ++i) {
-        if(memprot[i>>16].prot==memprot_default) {
+        uint8_t* block = getProtBlock(i>>16, 0);
+        if(block == memprot_default) {
             i=(((i>>16)+1)<<16)-1;  // next block
         } else {
-            uint32_t prot = memprot[i>>16].prot[i&0xffff];
+            uint32_t prot;
+            do {
+                prot = native_lock_xchg_b(&block[i&0xffff], PROT_WAIT);
+            } while(prot==PROT_WAIT);
             if(!(prot&PROT_NOPROT)) {
                 if(prot&PROT_DYNAREC) {
                     prot&=~PROT_DYN;
                     if(mark)
                         cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0);
                     mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_MMAP);
-                    memprot[i>>16].prot[i&0xffff] = prot;  // need to use atomic exchange?
                 } else if(prot&PROT_DYNAREC_R)
-                    memprot[i>>16].prot[i&0xffff] = prot&~PROT_CUSTOM;
+                    prot &= ~PROT_CUSTOM;
             }
+            native_lock_storeb(&block[i&0xffff], prot);
         }
     }
-    mutex_unlock(&mutex_prot);
 }
 
 int isprotectedDB(uintptr_t addr, size_t size)
@@ -931,7 +989,11 @@ int isprotectedDB(uintptr_t addr, size_t size)
         return 0;
     }
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t prot = memprot[i>>16].prot[i&0xffff];
+        uint8_t* block = getProtBlock(i>>16, 0);
+        uint32_t prot;
+        do {
+            prot = native_lock_get_b(&block[i&0xffff]);
+        } while(prot==PROT_WAIT);
         if(!(prot&PROT_DYN)) {
             dynarec_log(LOG_DEBUG, "0\n");
             return 0;
@@ -1046,17 +1108,15 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     mutex_lock(&mutex_prot);
     addMapMem(addr, addr+size-1);
-    for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i].prot==memprot_default) {
-            uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
-            memprot[i].prot = newblock;
-#ifdef TRACE_MEMSTAT
-            memprot_allocated += (1<<16) * sizeof(uint8_t);
-            if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated;
-#endif
-        }
+    UNLOCK_DYNAREC();
+    uintptr_t bidx = ~1LL;
+    uint8_t *block = NULL;
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t old_prot = memprot[i>>16].prot[i&0xffff];
+        if(bidx!=i>>16) {
+            bidx = i>>16;
+            block = getProtBlock(bidx, 1);
+        }
+        GET_PROT_WAIT(old_prot, i&0xffff);
         uint32_t dyn=(old_prot&PROT_DYN);
         uint32_t mapped=(old_prot&PROT_MMAP);
         if(!(dyn&PROT_NOPROT)) {
@@ -1067,9 +1127,9 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot)
                 dyn = PROT_DYNAREC_R;
             }
         }
-        memprot[i>>16].prot[i&0xffff] = prot|dyn|mapped;
+        SET_PROT(i&0xffff, prot|dyn|mapped);
     }
-    mutex_unlock(&mutex_prot);
+    UNLOCK_NODYNAREC();
 }
 
 void setProtection(uintptr_t addr, size_t size, uint32_t prot)
@@ -1080,23 +1140,17 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     mutex_lock(&mutex_prot);
     addMapMem(addr, addr+size-1);
+    UNLOCK_DYNAREC();
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) {
-        if(memprot[i].prot==memprot_default && prot) {
-            uint8_t* newblock = box_calloc(MEMPROT_SIZE, sizeof(uint8_t));
-            memprot[i].prot = newblock;
-#ifdef TRACE_MEMSTAT
-            memprot_allocated += (1<<16) * sizeof(uint8_t);
-            if (memprot_allocated > memprot_max_allocated) memprot_max_allocated = memprot_allocated;
-#endif
-        }
-        if(prot || memprot[i].prot!=memprot_default) {
+        uint8_t* block = getProtBlock(i, prot?1:0);
+        if(prot || block!=memprot_default) {
             uintptr_t bstart = ((i<<16)<idx)?(idx&0xffff):0;
             uintptr_t bend = (((i<<16)+0xffff)>end)?(end&0xffff):0xffff;
             for (uintptr_t j=bstart; j<=bend; ++j)
-                memprot[i].prot[j] = prot;
+                SET_PROT(j, prot);
         }
     }
-    mutex_unlock(&mutex_prot);
+    UNLOCK_NODYNAREC();
 }
 
 void setProtection_mmap(uintptr_t addr, size_t size, uint32_t prot)
@@ -1112,16 +1166,15 @@ void setProtection_mmap(uintptr_t addr, size_t size, uint32_t prot)
 
 void refreshProtection(uintptr_t addr)
 {
-    mutex_lock(&mutex_prot);
+    LOCK_NODYNAREC();
     uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    if(memprot[idx>>16].prot!=memprot_default) {
-        int prot = memprot[idx>>16].prot[idx&0xffff];
-        if(!(prot&PROT_DYNAREC)) {
-            int ret = mprotect((void*)(idx<<MEMPROT_SHIFT), box64_pagesize, prot&~PROT_CUSTOM);
+    uint8_t* block = getProtBlock(idx>>16, 0);
+    if(block!=memprot_default) {
+        GET_PROT(prot, idx&0xffff);
+        int ret = mprotect((void*)(idx<<MEMPROT_SHIFT), box64_pagesize, prot&~PROT_CUSTOM);
 printf_log(LOG_INFO, "refreshProtection(%p): %p/0x%x (ret=%d/%s)\n", (void*)addr, (void*)(idx<<MEMPROT_SHIFT), prot, ret, ret?strerror(errno):"ok");
-        }
     }
-    mutex_unlock(&mutex_prot);
+    UNLOCK_NODYNAREC();
 }
 
 void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
@@ -1133,23 +1186,8 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     mutex_lock(&mutex_prot);
     addMapMem(addr, addr+size-1);
-    // don't need to add precise tracking probably
-    /*for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i].prot==memprot_default) {
-            uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
-            memprot[i].prot = newblock;
-        }
-    for (uintptr_t i=idx; i<=end; ++i) {
-        const uintptr_t start = i&(MEMPROT_SIZE-1);
-        const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-        uint8_t* block = memprot[i>>16].prot;
-        for(uintptr_t ii = start; ii<=finish; ++ii) {
-            if(!block[ii])
-                block[ii] = prot;
-        }
-        i+=finish-start;    // +1 from the "for" loop
-    }*/
     mutex_unlock(&mutex_prot);
+    // don't need to add precise tracking probably
 }
 
 #ifdef DYNAREC
@@ -1157,7 +1195,7 @@ int IsInHotPage(uintptr_t addr) {
     if(addr>=(1LL<<48))
         return 0;
     int idx = (addr>>MEMPROT_SHIFT)>>16;
-    uint8_t *hot = memprot[idx].hot;
+    uint8_t *hot = (uint8_t*)native_lock_get_dd(&memprot[idx].hot);
     if(!hot)
         return 0;
     int base = (addr>>MEMPROT_SHIFT)&0xffff;
@@ -1178,7 +1216,7 @@ int AreaInHotPage(uintptr_t start, uintptr_t end_) {
     }
     int ret = 0;
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint8_t *block = memprot[i>>16].hot;
+        uint8_t *block = (uint8_t*)native_lock_get_dd(&memprot[i>>16].hot);
         int base = i&0xffff;
         if(block) {
             uint32_t hot = block[base];
@@ -1255,19 +1293,31 @@ void freeProtection(uintptr_t addr, size_t size)
         end = (1LL<<(48-MEMPROT_SHIFT))-1;
     mutex_lock(&mutex_prot);
     removeMapMem(addr, addr+size-1);
+    UNLOCK_DYNAREC();
     for (uintptr_t i=idx; i<=end; ++i) {
         const uint32_t key = (i>>16);
         const uintptr_t start = i&(MEMPROT_SIZE-1);
         const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-        if(memprot[key].prot!=memprot_default) {
-            uint8_t *block = memprot[key].prot;
+        if(getProtBlock(key, 0)!=memprot_default) {
             if(start==0 && finish==MEMPROT_SIZE-1) {
+                #ifdef DYNAREC
+                uint8_t *block = (uint8_t*)native_lock_xchg_dd(&memprot[key].prot, (uintptr_t)memprot_default);
+                #else
+                uint8_t *block = memprot[key].prot; 
                 memprot[key].prot = memprot_default;
-                box_free(block);
+                #endif
+                if(block!=memprot_default) {
+                    box_free(block);
 #ifdef TRACE_MEMSTAT
-                memprot_allocated -= (1<<16) * sizeof(uint8_t);
+                    memprot_allocated -= (1<<16) * sizeof(uint8_t);
 #endif
+                }
             } else {
+                #ifdef DYNAREC
+                uint8_t *block = (uint8_t*)native_lock_get_dd(&memprot[key].prot);
+                #else
+                uint8_t *block = memprot[key].prot;
+                #endif
                 memset(block+start, 0, (finish-start+1)*sizeof(uint8_t));
                 // blockempty is quite slow, so disable the free of blocks for now
                 /*else if(blockempty(block)) {
@@ -1276,27 +1326,39 @@ void freeProtection(uintptr_t addr, size_t size)
                 }*/
             }
         }
+        #ifdef DYNAREC
+        if(native_lock_get_dd(&memprot[key].hot) && start==0 && finish==MEMPROT_SIZE-1) {
+            uint8_t *hot = (uint8_t*)native_lock_xchg_dd(&memprot[key].hot, 0);
+            if(hot) {
+                box_free(hot);
+#ifdef TRACE_MEMSTAT
+                memprot_allocated -= (1<<16) * sizeof(uint8_t);
+#endif
+            }
+        }
+        #else
         if(memprot[key].hot && start==0 && finish==MEMPROT_SIZE-1) {
-            uint8_t *hot = memprot[key].hot;
+            box_free(memprot[key].hot);
             memprot[key].hot = NULL;
-            box_free(hot);
 #ifdef TRACE_MEMSTAT
             memprot_allocated -= (1<<16) * sizeof(uint8_t);
 #endif
         }
+        #endif
         i+=finish-start;    // +1 from the "for" loop
     }
-    mutex_unlock(&mutex_prot);
+    UNLOCK_NODYNAREC();
 }
 
 uint32_t getProtection(uintptr_t addr)
 {
     if(addr>=(1LL<<48))
         return 0;
-    mutex_lock(&mutex_prot);
+    LOCK_NODYNAREC();
     const uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    uint32_t ret = memprot[idx>>16].prot[idx&0xffff];
-    mutex_unlock(&mutex_prot);
+    uint8_t *block = getProtBlock(idx>>16, 0);
+    GET_PROT(ret, idx&0xffff);
+    UNLOCK_NODYNAREC();
     return ret&~PROT_MMAP;
 }
 
@@ -1304,10 +1366,11 @@ int getMmapped(uintptr_t addr)
 {
     if(addr>=(1LL<<48))
         return 0;
-    mutex_lock(&mutex_prot);
+    LOCK_NODYNAREC();
     const uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    uint32_t ret = memprot[idx>>16].prot[idx&0xffff];
-    mutex_unlock(&mutex_prot);
+    uint8_t *block = getProtBlock(idx>>16, 0);
+    GET_PROT(ret, idx&0xffff);
+    UNLOCK_NODYNAREC();
     return (ret&PROT_MMAP)?1:0;
 }
 
diff --git a/src/dynarec/arm64/arm64_lock.S b/src/dynarec/arm64/arm64_lock.S
index 139625d6..e57137e7 100644
--- a/src/dynarec/arm64/arm64_lock.S
+++ b/src/dynarec/arm64/arm64_lock.S
@@ -30,6 +30,9 @@
 .global arm64_lock_decifnot0
 .global arm64_lock_store
 .global arm64_lock_store_dd
+.global arm64_lock_get_b
+.global arm64_lock_get_d
+.global arm64_lock_get_dd
 
 arm64_lock_read_b:
     dmb     ish
@@ -260,3 +263,15 @@ arm64_lock_store_dd:
     str     x1, [x0]
     dmb     ish
     ret
+
+arm64_lock_get_b:
+    ldaxrb  w0, [x0]
+    ret
+
+arm64_lock_get_d:
+    ldaxr   w0, [x0]
+    ret
+
+arm64_lock_get_dd:
+    ldaxr   x0, [x0]
+    ret
diff --git a/src/dynarec/arm64/arm64_lock.h b/src/dynarec/arm64/arm64_lock.h
index 6572adc4..1c827d00 100644
--- a/src/dynarec/arm64/arm64_lock.h
+++ b/src/dynarec/arm64/arm64_lock.h
@@ -72,4 +72,13 @@ extern void arm64_lock_store(void*p, uint32_t v);
 // atomic store (with memory barrier)
 extern void arm64_lock_store_dd(void*p, uint64_t v);
 
+// atomic get (with memory barrier)
+extern uint8_t arm64_lock_get_b(void*p);
+
+// atomic get (with memory barrier)
+extern uint32_t arm64_lock_get_d(void*p);
+
+// atomic get (with memory barrier)
+extern void* arm64_lock_get_dd(void*p);
+
 #endif  //__ARM64_LOCK__H__
diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h
index a27ca514..70a8eaa8 100644
--- a/src/dynarec/native_lock.h
+++ b/src/dynarec/native_lock.h
@@ -29,6 +29,9 @@
 #define native_lock_decifnot0(A)            arm64_lock_decifnot0(A)

 #define native_lock_store(A, B)             arm64_lock_store(A, B)

 #define native_lock_store_dd(A, B)          arm64_lock_store_dd(A, B)

+#define native_lock_get_b(A)                arm64_lock_get_b(A)

+#define native_lock_get_d(A)                arm64_lock_get_d(A)

+#define native_lock_get_dd(A)               arm64_lock_get_dd(A)

 

 #elif defined(RV64)

 #include "rv64/rv64_lock.h"

@@ -70,6 +73,9 @@
 // there is no atomic move on 16bytes, so faking it

 #define native_lock_read_dq(A, B, C)        *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1];

 #define native_lock_write_dq(A, B, C)       rv64_lock_cas_dq(C, A, tmpcas, B);

+#define native_lock_get_b(A)                rv64_lock_get_b(A)

+#define native_lock_get_d(A)                rv64_lock_get_d(A)

+#define native_lock_get_dd(A)               rv64_lock_get_dd(A)

 

 #else

 #error Unsupported architecture

diff --git a/src/dynarec/rv64/rv64_lock.S b/src/dynarec/rv64/rv64_lock.S
index 505e3f31..5d53730c 100644
--- a/src/dynarec/rv64/rv64_lock.S
+++ b/src/dynarec/rv64/rv64_lock.S
@@ -21,6 +21,8 @@
 .global rv64_lock_cas_d
 .global rv64_lock_cas_dd
 .global rv64_lock_cas_dq
+.global rv64_lock_get_d
+.global rv64_lock_get_dd
 
 rv64_lock_xchg_dd:
     // address is a0, value is a1, return old value in a0
@@ -235,4 +237,19 @@ rv64_lock_cas_dq:
     ret
 1:
     li a0, 1
-    ret
\ No newline at end of file
+    ret
+
+rv64_lock_get_b:
+    fence   rw, rw
+    lb      a0, 0(a0)
+    ret
+
+rv64_lock_get_d:
+    fence   rw, rw
+    lw      a0, 0(a0)
+    ret
+
+rv64_lock_get_dd:
+    fence   rw, rw
+    ld      a0, 0(a0)
+    ret
diff --git a/src/dynarec/rv64/rv64_lock.h b/src/dynarec/rv64/rv64_lock.h
index c59b876d..67fc9ed6 100644
--- a/src/dynarec/rv64/rv64_lock.h
+++ b/src/dynarec/rv64/rv64_lock.h
@@ -50,6 +50,12 @@ extern void rv64_lock_store_dd(void*p, uint64_t v);
 // (mostly) Atomically store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
 extern int rv64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2);
 
+// atomic get (with memory barrier)
+extern uint32_t rv64_lock_get_d(void* p);
+
+// atomic get (with memory barrier)
+extern void* rv64_lock_get_dd(void* p);
+
 // Not defined in assembler but in dynarec_rv64_functions
 uint8_t extract_byte(uint32_t val, void* address);
 uint32_t insert_byte(uint32_t val, uint8_t b, void* address);
diff --git a/src/include/custommem.h b/src/include/custommem.h
index 13e73c4f..d3536ae3 100644
--- a/src/include/custommem.h
+++ b/src/include/custommem.h
@@ -56,6 +56,7 @@ uintptr_t getJumpAddress64(uintptr_t addr);
 #define PROT_MMAP       0x10
 #define PROT_DYN        (PROT_DYNAREC | PROT_DYNAREC_R | PROT_NOPROT)
 #define PROT_CUSTOM     (PROT_DYNAREC | PROT_DYNAREC_R | PROT_MMAP | PROT_NOPROT)
+#define PROT_WAIT       0xFF
 
 void updateProtection(uintptr_t addr, size_t size, uint32_t prot);
 void setProtection(uintptr_t addr, size_t size, uint32_t prot);
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index 1c3c117a..99816c30 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -1141,6 +1141,7 @@ exit(-1);
                 old_addr = (uintptr_t)addr;
                 refreshProtection(old_addr);
                 relockMutex(Locks);
+                sched_yield();  // give time to the other process
                 return; // that's probably just a multi-task glitch, like seen in terraria
             }
             old_addr = 0;