about summary refs log tree commit diff stats
path: root/src/custommem.c
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2022-10-31 12:05:04 +0100
committerGitHub <noreply@github.com>2022-10-31 12:05:04 +0100
commit8459ea0d31a851e847318273233c9a8b2b079eae (patch)
tree2ab39af3c19d9720d906cd060211107c5dc549a2 /src/custommem.c
parent494c245b591cef1a34cc4d4dc52163861d4e54af (diff)
downloadbox64-8459ea0d31a851e847318273233c9a8b2b079eae.tar.gz
box64-8459ea0d31a851e847318273233c9a8b2b079eae.zip
Remove dynablocklist (#438)
* [DYNAREC] Removed dynablock sons handling
* [DYNAREC] Removed dynablocklist, and attached dynablocks to the jumptable
* [DYNAREC] Changed handling of HotPage (part of Protection now)
* [DYNAREC] Fixed some performance issues (especially with heavily threaded programs)
Diffstat (limited to 'src/custommem.c')
-rw-r--r--src/custommem.c373
1 files changed, 240 insertions, 133 deletions
diff --git a/src/custommem.c b/src/custommem.c
index e8abdd86..3264b8ec 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -26,12 +26,12 @@
 #ifdef DYNAREC
 #include "dynablock.h"
 #include "dynarec/native_lock.h"
+#include "dynarec/dynarec_next.h"
 
 //#define USE_MMAP
 
 // init inside dynablocks.c
 KHASH_MAP_INIT_INT64(dynablocks, dynablock_t*)
-static dynablocklist_t***  dynmap123[1<<DYNAMAP_SHIFT] = {0}; // 64bits.. in 4x16bits array
 static mmaplist_t          *mmaplist = NULL;
 static size_t              mmapsize = 0;
 static size_t              mmapcap = 0;
@@ -60,8 +60,13 @@ static pthread_mutex_t     mutex_prot;
 #endif
 #define MEMPROT_SIZE (1<<16)
 #define MEMPROT_SIZE0 (48-MEMPROT_SHIFT2)
-static uint8_t *volatile memprot[1<<MEMPROT_SIZE0];    // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection]
-static uint8_t memprot_default[MEMPROT_SIZE];
+typedef struct memprot_s
+{
+    uint8_t* prot;
+    uint8_t* hot;
+} memprot_t;
+static memprot_t memprot[1<<MEMPROT_SIZE0];    // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection]
+static uint8_t   memprot_default[MEMPROT_SIZE];
 static int inited = 0;
 
 typedef struct mapmem_s {
@@ -82,6 +87,7 @@ typedef struct blocklist_s {
 
 static pthread_mutex_t     mutex_blocks;
 static int                 n_blocks = 0;       // number of blocks for custom malloc
+static int                 c_blocks = 0;       // capacity of blocks for custom malloc
 static blocklist_t*        p_blocks = NULL;    // actual blocks for custom malloc
 
 typedef union mark_s {
@@ -273,11 +279,18 @@ static size_t sizeBlock(void* sub)
     return s->next.size;
 }
 
+#define THRESHOLD   (128-2*sizeof(blockmark_t))
+
 static size_t roundSize(size_t size)
 {
     if(!size)
         return size;
-    return (size+7)&~7LL;   // 8 bytes align in size
+    size = (size+7)&~7LL;   // 8 bytes align in size
+
+    if(size<THRESHOLD)
+        size = THRESHOLD;
+
+    return size;
 }
 
 void* customMalloc(size_t size)
@@ -285,12 +298,15 @@ void* customMalloc(size_t size)
     size = roundSize(size);
     // look for free space
     void* sub = NULL;
+    size_t fullsize = size+2*sizeof(blockmark_t);
     pthread_mutex_lock(&mutex_blocks);
     for(int i=0; i<n_blocks; ++i) {
         if(p_blocks[i].maxfree>=size) {
             size_t rsize = 0;
-            sub = getFirstBlock(p_blocks[i].first, size, &rsize, NULL);
+            sub = getFirstBlock(p_blocks[i].block, size, &rsize, p_blocks[i].first);
             if(sub) {
+                if(rsize-size<THRESHOLD)
+                    size = rsize;
                 void* ret = allocBlock(p_blocks[i].block, sub, size, NULL);
                 if(sub==p_blocks[i].first)
                     p_blocks[i].first = getNextFreeBlock(sub);
@@ -303,10 +319,11 @@ void* customMalloc(size_t size)
     }
     // add a new block
     int i = n_blocks++;
-    p_blocks = (blocklist_t*)box_realloc(p_blocks, n_blocks*sizeof(blocklist_t));
-    size_t allocsize = MMAPSIZE;
-    if(size+2*sizeof(blockmark_t)>allocsize)
-        allocsize = size+2*sizeof(blockmark_t);
+    if(n_blocks>c_blocks) {
+        c_blocks += 4;
+        p_blocks = (blocklist_t*)box_realloc(p_blocks, c_blocks*sizeof(blocklist_t));
+    }
+    size_t allocsize = (fullsize>MMAPSIZE)?fullsize:MMAPSIZE;
     #ifdef USE_MMAP
     void* p = mmap(NULL, allocsize, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
     memset(p, 0, allocsize);
@@ -610,18 +627,29 @@ void FreeDynarecMap(dynablock_t* db, uintptr_t addr, size_t size)
     ActuallyFreeDynarecMap(db, addr, size);
 }
 
-dynablocklist_t* getDB(uintptr_t idx)
+dynablock_t* getDB(uintptr_t addr)
 {
-    // already 16bits shifted
-    uintptr_t idx3 = (idx>>32)&((1<<DYNAMAP_SHIFT)-1);
-    uintptr_t idx2 = (idx>>16)&((1<<DYNAMAP_SHIFT)-1);
-    uintptr_t idx1 = (idx    )&((1<<DYNAMAP_SHIFT)-1);
-
-    if(!dynmap123[idx3])
-        return NULL;
-    if(!dynmap123[idx3][idx2])
+    if(isJumpTableDefault64((void*)addr))
         return NULL;
-    return dynmap123[idx3][idx2][idx1];
+    uintptr_t ret = getJumpAddress64(addr);
+    return *(dynablock_t**)(ret-sizeof(void*));
+}
+uintptr_t getDefaultSize(uintptr_t addr)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = (((uintptr_t)addr)>>48)&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2)
+        return (addr&~((1LL<<48)-1)|0xffffffffffffLL)-addr + 1;
+    idx2 = (((uintptr_t)addr)>>32)&0xffff;
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1)
+        return (addr&~((1LL<<32)-1)|0xffffffffLL)-addr + 1;
+    idx1 = (((uintptr_t)addr)>>16)&0xffff;
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0)
+        return (addr&~((1LL<<16)-1)|0xffffLL)-addr + 1;
+    idx0 = addr&0xffff;
+    while(idx0<0x10000 && box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)native_next)
+        ++idx0;
+    return idx0 - (addr&0xffff);
 }
 
 // each dynmap is 64k of size
@@ -629,87 +657,29 @@ dynablocklist_t* getDB(uintptr_t idx)
 void addDBFromAddressRange(uintptr_t addr, size_t size)
 {
     dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1));
-    uintptr_t idx = (addr>>DYNAMAP_SHIFT);
-    uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT);
-    for (uintptr_t i=idx; i<=end; ++i) {
-        int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1);
-        int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1);
-        int idx1 = (i    )&((1<<DYNAMAP_SHIFT)-1);
-        if(!dynmap123[idx3]) {
-            dynablocklist_t*** p = (dynablocklist_t***)box_calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t**));
-            if(native_lock_storeifnull(&dynmap123[idx3], p)!=p)
-                box_free(p);
-        }
-        if(!dynmap123[idx3][idx2]) {
-            dynablocklist_t** p = (dynablocklist_t**)box_calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t*));
-            if(native_lock_storeifnull(&dynmap123[idx3][idx2], p)!=p)
-                box_free(p);
-        }
-        if(!dynmap123[idx3][idx2][idx1]) {
-            dynablocklist_t* p = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0);
-            if(native_lock_storeifnull(&dynmap123[idx3][idx2][idx1], p)!=p)
-                FreeDynablockList(&p);
-        }
-    }
-}
-
-static int dynmapempty(void** mem)
-{
-    for (int i=0; i<(1<<DYNAMAP_SHIFT); ++i)
-        if(mem[i])
-            return 0;
-    return 1;
+    // do nothing, dynablock are allowed based on memory protection flags
 }
 
 void cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy)
 {
-    dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark");
-    uintptr_t idx = (addr>>DYNAMAP_SHIFT);
-    uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT);
-    for (uintptr_t i=idx; i<=end; ++i) {
-        int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1);
-        int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1);
-        int idx1 = (i    )&((1<<DYNAMAP_SHIFT)-1);
-        if(dynmap123[idx3] && dynmap123[idx3][idx2]) {
-            dynablocklist_t* dblist = dynmap123[idx3][idx2][idx1];
-            if(dblist) {
-                if(destroy) {
-                    if(FreeRangeDynablock(dblist, addr, size) && 0) {    // dblist is empty, check if we can delete more...
-                        // disabling this for now. It seems to cause random crash in Terraria
-                        if(!native_lock_storeifref(&dynmap123[idx3][idx2][idx1], NULL, dblist)) {
-                            dynablocklist_t** p = dynmap123[idx3][idx2];
-                            if(dynmapempty((void**)p)) {
-                                if(!native_lock_storeifref(&dynmap123[idx3][idx2], NULL, p)) {
-                                    dynablocklist_t*** p2 = dynmap123[idx3];
-                                    if(dynmapempty((void**)p2)) {
-                                        if(!native_lock_storeifref(&dynmap123[idx3], NULL, p2)) {
-                                            box_free(p2);
-                                        }
-                                    }
-                                    box_free(p);
-                                }
-                            }
-                            FreeDynablockList(&dblist);
-                        }
-                    }
-                } else
-                    MarkRangeDynablock(dblist, addr, size);
-            }
+    uintptr_t start_addr = my_context?((addr<my_context->max_db_size)?0:(addr-my_context->max_db_size)):addr;
+    dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p/%p -> %p %s\n", (void*)addr, (void*)start_addr, (void*)(addr+size-1), destroy?"destroy":"mark");
+    for (uintptr_t i=start_addr; i<addr+size; ++i) {
+        dynablock_t* db = getDB(i);
+        if(db) {
+            if(destroy)
+                FreeRangeDynablock(db, addr, size);
+            else
+                MarkRangeDynablock(db, addr, size);
+        } else {
+            uintptr_t next = getDefaultSize(i);
+            if(next)
+                i+=next-1;
         }
     }
 }
 
-#ifdef ARM64
-void arm64_next(void);
-#define native_next arm64_next
-#elif defined(LA464)
-void la464_next(void);
-#define native_next la464_next
-#else
-#error Unsupported architecture
-#endif
-
-void addJumpTableIfDefault64(void* addr, void* jmp)
+int addJumpTableIfDefault64(void* addr, void* jmp)
 {
     uintptr_t idx3, idx2, idx1, idx0;
     idx3 = (((uintptr_t)addr)>>48)&0xffff;
@@ -738,7 +708,7 @@ void addJumpTableIfDefault64(void* addr, void* jmp)
             box_free(tbl);
     }
 
-    native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next);
+    return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next)==jmp)?1:0;
 }
 void setJumpTableDefault64(void* addr)
 {
@@ -755,6 +725,51 @@ void setJumpTableDefault64(void* addr)
     idx0 = (((uintptr_t)addr)    )&0xffff;
     box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)native_next;
 }
+void setJumpTableDefaultRef64(void* addr, void* jmp)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = (((uintptr_t)addr)>>48)&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2)
+        return;
+    idx2 = (((uintptr_t)addr)>>32)&0xffff;
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1)
+        return;
+    idx1 = (((uintptr_t)addr)>>16)&0xffff;
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0)
+        return;
+    idx0 = (((uintptr_t)addr)    )&0xffff;
+    native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], native_next, jmp);
+}
+int setJumpTableIfRef64(void* addr, void* jmp, void* ref)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = (((uintptr_t)addr)>>48)&0xffff;
+    idx2 = (((uintptr_t)addr)>>32)&0xffff;
+    idx1 = (((uintptr_t)addr)>>16)&0xffff;
+    idx0 = (((uintptr_t)addr)    )&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2) {
+        uintptr_t*** tbl = (uintptr_t***)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault1;
+        if(native_lock_storeifref(&box64_jmptbl3[idx3], tbl, box64_jmptbldefault2)!=tbl)
+            box_free(tbl);
+    }
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) {
+        uintptr_t** tbl = (uintptr_t**)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault0;
+        if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2], tbl, box64_jmptbldefault1)!=tbl)
+            box_free(tbl);
+    }
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) {
+        uintptr_t* tbl = (uintptr_t*)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = (uintptr_t)native_next;
+        if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1], tbl, box64_jmptbldefault0)!=tbl)
+            box_free(tbl);
+    }
+    return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, ref)==jmp)?1:0;
+}
 int isJumpTableDefault64(void* addr)
 {
     uintptr_t idx3, idx2, idx1, idx0;
@@ -807,6 +822,38 @@ uintptr_t getJumpTableAddress64(uintptr_t addr)
     return (uintptr_t)&box64_jmptbl3[idx3][idx2][idx1][idx0];
 }
 
+uintptr_t getJumpAddress64(uintptr_t addr)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = ((addr)>>48)&0xffff;
+    idx2 = ((addr)>>32)&0xffff;
+    idx1 = ((addr)>>16)&0xffff;
+    idx0 = ((addr)    )&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2) {
+        uintptr_t*** tbl = (uintptr_t***)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault1;
+        if(native_lock_storeifref(&box64_jmptbl3[idx3], tbl, box64_jmptbldefault2)!=tbl)
+            box_free(tbl);
+    }
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) {
+        uintptr_t** tbl = (uintptr_t**)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault0;
+        if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2], tbl, box64_jmptbldefault1)!=tbl)
+            box_free(tbl);
+    }
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) {
+        uintptr_t* tbl = (uintptr_t*)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = (uintptr_t)native_next;
+        if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1], tbl, box64_jmptbldefault0)!=tbl)
+            box_free(tbl);
+    }
+
+    return (uintptr_t)box64_jmptbl3[idx3][idx2][idx1][idx0];
+}
+
 // Remove the Write flag from an adress range, so DB can be executed safely
 void protectDB(uintptr_t addr, uintptr_t size)
 {
@@ -820,24 +867,24 @@ void protectDB(uintptr_t addr, uintptr_t size)
     pthread_mutex_lock(&mutex_prot);
     int ret;
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i]==memprot_default) {
+        if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
             /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
                 box_free(newblock);
             }*/
-            memprot[i] = newblock;
+            memprot[i].prot = newblock;
         }
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t prot = memprot[i>>16][i&0xffff];
+        uint32_t prot = memprot[i>>16].prot[i&0xffff];
         uint32_t dyn = prot&PROT_CUSTOM;
         prot&=~PROT_CUSTOM;
         if(!prot)
             prot = PROT_READ | PROT_WRITE | PROT_EXEC;      // comes from malloc & co, so should not be able to execute
         if((prot&PROT_WRITE)) {
             if(!dyn) mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
-            memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC;   // need to use atomic exchange?
+            memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC;   // need to use atomic exchange?
         } else 
-            memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC_R;
+            memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC_R;
     }
     pthread_mutex_unlock(&mutex_prot);
 }
@@ -855,23 +902,23 @@ void unprotectDB(uintptr_t addr, size_t size, int mark)
         return;
     pthread_mutex_lock(&mutex_prot);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i]==memprot_default) {
+        if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
             /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
                 box_free(newblock);
             }*/
-            memprot[i] = newblock;
+            memprot[i].prot = newblock;
         }
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t prot = memprot[i>>16][i&0xffff];
+        uint32_t prot = memprot[i>>16].prot[i&0xffff];
         if(prot&PROT_DYNAREC) {
             prot&=~PROT_CUSTOM;
             if(mark)
                 cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0);
             mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot);
-            memprot[i>>16][i&0xffff] = prot;  // need to use atomic exchange?
+            memprot[i>>16].prot[i&0xffff] = prot;  // need to use atomic exchange?
         } else if(prot&PROT_DYNAREC_R)
-            memprot[i>>16][i&0xffff] = prot&~PROT_CUSTOM;
+            memprot[i>>16].prot[i&0xffff] = prot&~PROT_CUSTOM;
     }
     pthread_mutex_unlock(&mutex_prot);
 }
@@ -888,7 +935,7 @@ int isprotectedDB(uintptr_t addr, size_t size)
         return 0;
     }
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t prot = memprot[i>>16][i&0xffff];
+        uint32_t prot = memprot[i>>16].prot[i&0xffff];
         if(!(prot&PROT_DYNAREC || prot&PROT_DYNAREC_R)) {
             dynarec_log(LOG_DEBUG, "0\n");
             return 0;
@@ -1002,25 +1049,25 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot)
         return;
     pthread_mutex_lock(&mutex_prot);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i]==memprot_default) {
+        if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
 #if 0 //def ARM64   //disabled for now, not usefull with the mutex
             if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
                 box_free(newblock);
             }
 #else
-            memprot[i] = newblock;
+            memprot[i].prot = newblock;
 #endif
         }
     for (uintptr_t i=idx; i<=end; ++i) {
-        uint32_t dyn=(memprot[i>>16][i&0xffff]&(PROT_DYNAREC | PROT_DYNAREC_R));
+        uint32_t dyn=(memprot[i>>16].prot[i&0xffff]&(PROT_DYNAREC | PROT_DYNAREC_R));
         if(dyn && (prot&PROT_WRITE)) {   // need to remove the write protection from this block
             dyn = PROT_DYNAREC;
             mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
         } else if(dyn && !(prot&PROT_WRITE)) {
             dyn = PROT_DYNAREC_R;
         }
-        memprot[i>>16][i&0xffff] = prot|dyn;
+        memprot[i>>16].prot[i&0xffff] = prot|dyn;
     }
     pthread_mutex_unlock(&mutex_prot);
 }
@@ -1036,18 +1083,18 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot)
         return;
     pthread_mutex_lock(&mutex_prot);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i]==memprot_default) {
+        if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(MEMPROT_SIZE, sizeof(uint8_t));
 #if 0 //def ARM64   //disabled for now, not usefull with the mutex
             if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
                 box_free(newblock);
             }
 #else
-            memprot[i] = newblock;
+            memprot[i].prot = newblock;
 #endif
         }
     for (uintptr_t i=idx; i<=end; ++i)
-        memprot[i>>16][i&0xffff] = prot;
+        memprot[i>>16].prot[i&0xffff] = prot;
     pthread_mutex_unlock(&mutex_prot);
 }
 
@@ -1063,20 +1110,20 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
         return;
     pthread_mutex_lock(&mutex_prot);
     for (uintptr_t i=(idx>>16); i<=(end>>16); ++i)
-        if(memprot[i]==memprot_default) {
+        if(memprot[i].prot==memprot_default) {
             uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
 #if 0 //def ARM64   //disabled for now, not usefull with the mutex
             if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
                 box_free(newblock);
             }
 #else
-            memprot[i] = newblock;
+            memprot[i].prot = newblock;
 #endif
         }
     for (uintptr_t i=idx; i<=end; ++i) {
         const uintptr_t start = i&(MEMPROT_SIZE-1);
         const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-        uint8_t* block = memprot[i>>16];
+        uint8_t* block = memprot[i>>16].prot;
         for(uintptr_t ii = start; ii<=finish; ++ii) {
             if(!block[ii])
                 block[ii] = prot;
@@ -1086,6 +1133,70 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot)
     pthread_mutex_unlock(&mutex_prot);
 }
 
+#ifdef DYNAREC
+#define HOTPAGE_STEP 16
+int IsInHotPage(uintptr_t addr) {
+    if(addr<=(1LL<<48))
+        return 0;
+    int idx = (addr>>MEMPROT_SHIFT)>>16;
+    if(!memprot[idx].hot)
+        return 0;
+    int base = (addr>>MEMPROT_SHIFT)&0xffff;
+    if(!memprot[idx].hot[base])
+        return 0;
+    // decrement hot
+    pthread_mutex_lock(&mutex_prot);
+    memprot[idx].hot[base]-=1;
+    pthread_mutex_unlock(&mutex_prot);
+    return 1;
+}
+
+int AreaInHotPage(uintptr_t start, uintptr_t end_) {
+    //dynarec_log(LOG_DEBUG, "AreaInHotPage %p -> %p => ", (void*)start, (void*)end_);
+    uintptr_t idx = (start>>MEMPROT_SHIFT);
+    uintptr_t end = (end_>>MEMPROT_SHIFT);
+    if(end>=(1LL<<(48-MEMPROT_SHIFT)))
+        end = (1LL<<(48-MEMPROT_SHIFT))-1LL;
+    if(end<idx) { // memory addresses higher than 48bits are not tracked
+        //dynarec_log(LOG_DEBUG, "00\n");
+        return 0;
+    }
+    for (uintptr_t i=idx; i<=end; ++i) {
+        uint8_t *block = memprot[i>>16].hot;
+        uint32_t hot = block?block[i&0xffff]:0;
+        if(hot) {
+            // decrement hot
+            pthread_mutex_lock(&mutex_prot);
+            block[i&0xffff]-=1;
+            pthread_mutex_unlock(&mutex_prot);
+            //dynarec_log(LOG_DEBUG, "1\n");
+            return 1;
+        }
+    }
+    //dynarec_log(LOG_DEBUG, "0\n");
+    return 0;
+
+}
+
+void AddHotPage(uintptr_t addr) {
+    int idx = (addr>>MEMPROT_SHIFT)>>16;
+    int base = (addr>>MEMPROT_SHIFT)&0xffff;
+    pthread_mutex_lock(&mutex_prot);
+    if(!memprot[idx].hot) {
+            uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t));
+#if 0 //def ARM64   //disabled for now, not usefull with the mutex
+            if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) {
+                box_free(newblock);
+            }
+#else
+            memprot[idx].hot = newblock;
+#endif
+    }
+    memprot[idx].hot[base] = HOTPAGE_STEP;
+    pthread_mutex_unlock(&mutex_prot);
+}
+#endif
+
 void loadProtectionFromMap()
 {
     if(box64_mapclean)
@@ -1130,11 +1241,11 @@ void freeProtection(uintptr_t addr, size_t size)
     pthread_mutex_lock(&mutex_prot);
     for (uintptr_t i=idx; i<=end; ++i) {
         const uint32_t key = (i>>16);
-        if(memprot[key]!=memprot_default) {
+        if(memprot[key].prot!=memprot_default) {
             const uintptr_t start = i&(MEMPROT_SIZE-1);
             const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1);
-            uint8_t *block = memprot[key];
-            memset(block+start, 0, finish-start+1);
+            uint8_t *block = memprot[key].prot;
+            memset(block+start, 0, (finish-start+1)*sizeof(uint8_t));
             // blockempty is quite slow, so disable the free of blocks for now
 #if 0 //def ARM64   //disabled for now, not useful with the mutex
             if (blockempty(block)) {
@@ -1149,8 +1260,13 @@ void freeProtection(uintptr_t addr, size_t size)
             }
 #else
             if(start==0 && finish==MEMPROT_SIZE-1) {
-                memprot[key] = memprot_default;
+                memprot[key].prot = memprot_default;
                 box_free(block);
+                if(memprot[key].hot) {
+                    uint8_t *hot = memprot[key].hot;
+                    memprot[key].hot = NULL;
+                    box_free(hot);
+                }
             }
             /*else if(blockempty(block)) {
                 memprot[key] = memprot_default;
@@ -1168,7 +1284,7 @@ uint32_t getProtection(uintptr_t addr)
     if(addr>=(1LL<<48))
         return 0;
     const uintptr_t idx = (addr>>MEMPROT_SHIFT);
-    uint32_t ret = memprot[idx>>16][idx&0xffff];
+    uint32_t ret = memprot[idx>>16].prot[idx&0xffff];
     return ret;
 }
 
@@ -1272,7 +1388,7 @@ void init_custommem_helper(box64context_t* ctx)
     inited = 1;
     memset(memprot_default, 0, sizeof(memprot_default));
     for(int i=0; i<(1<<MEMPROT_SIZE0); ++i)
-        memprot[i] = memprot_default;
+        memprot[i].prot = memprot_default;
     init_mutexes();
 #ifdef DYNAREC
     if(box64_dynarec)
@@ -1328,18 +1444,6 @@ void fini_custommem_helper(box64context_t *ctx)
         }
         mmapsize = 0;
         mmapcap = 0;
-        dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n");
-        for (uintptr_t idx3=0; idx3<=0xffff; ++idx3)
-            if(dynmap123[idx3]) {
-                for (uintptr_t idx2=0; idx2<=0xffff; ++idx2)
-                    if(dynmap123[idx3][idx2]) {
-                        for (uintptr_t idx1=0; idx1<=0xffff; ++idx1)
-                            if(dynmap123[idx3][idx2][idx1])
-                                FreeDynablockList(&dynmap123[idx3][idx2][idx1]);
-                        box_free(dynmap123[idx3][idx2]);
-                    }
-                box_free(dynmap123[idx3]);
-            }
 
         box_free(mmaplist);
         for (int i3=0; i3<(1<<DYNAMAP_SHIFT); ++i3)
@@ -1360,9 +1464,12 @@ void fini_custommem_helper(box64context_t *ctx)
 #endif
     uint8_t* m;
     for(int i=0; i<(1<<MEMPROT_SIZE0); ++i) {
-        m = memprot[i];
+        m = memprot[i].prot;
         if(m!=memprot_default)
             box_free(m);
+        m = memprot[i].hot;
+        if(m)
+            box_free(m);
     }
 
     for(int i=0; i<n_blocks; ++i)