diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-10-31 12:05:04 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-10-31 12:05:04 +0100 |
| commit | 8459ea0d31a851e847318273233c9a8b2b079eae (patch) | |
| tree | 2ab39af3c19d9720d906cd060211107c5dc549a2 /src/custommem.c | |
| parent | 494c245b591cef1a34cc4d4dc52163861d4e54af (diff) | |
| download | box64-8459ea0d31a851e847318273233c9a8b2b079eae.tar.gz box64-8459ea0d31a851e847318273233c9a8b2b079eae.zip | |
Remove dynablocklist (#438)
* [DYNAREC] Removed dynablock sons handling * [DYNAREC] Removed dynablocklist, and attached dynablocks to the jumptable * [DYNAREC] Changed handling of HotPage (part of Protection now) * [DYNAREC] Fixed some performance issues (especially with heavily threaded programs)
Diffstat (limited to 'src/custommem.c')
| -rw-r--r-- | src/custommem.c | 373 |
1 files changed, 240 insertions, 133 deletions
diff --git a/src/custommem.c b/src/custommem.c index e8abdd86..3264b8ec 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -26,12 +26,12 @@ #ifdef DYNAREC #include "dynablock.h" #include "dynarec/native_lock.h" +#include "dynarec/dynarec_next.h" //#define USE_MMAP // init inside dynablocks.c KHASH_MAP_INIT_INT64(dynablocks, dynablock_t*) -static dynablocklist_t*** dynmap123[1<<DYNAMAP_SHIFT] = {0}; // 64bits.. in 4x16bits array static mmaplist_t *mmaplist = NULL; static size_t mmapsize = 0; static size_t mmapcap = 0; @@ -60,8 +60,13 @@ static pthread_mutex_t mutex_prot; #endif #define MEMPROT_SIZE (1<<16) #define MEMPROT_SIZE0 (48-MEMPROT_SHIFT2) -static uint8_t *volatile memprot[1<<MEMPROT_SIZE0]; // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection] -static uint8_t memprot_default[MEMPROT_SIZE]; +typedef struct memprot_s +{ + uint8_t* prot; + uint8_t* hot; +} memprot_t; +static memprot_t memprot[1<<MEMPROT_SIZE0]; // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection] +static uint8_t memprot_default[MEMPROT_SIZE]; static int inited = 0; typedef struct mapmem_s { @@ -82,6 +87,7 @@ typedef struct blocklist_s { static pthread_mutex_t mutex_blocks; static int n_blocks = 0; // number of blocks for custom malloc +static int c_blocks = 0; // capacity of blocks for custom malloc static blocklist_t* p_blocks = NULL; // actual blocks for custom malloc typedef union mark_s { @@ -273,11 +279,18 @@ static size_t sizeBlock(void* sub) return s->next.size; } +#define THRESHOLD (128-2*sizeof(blockmark_t)) + static size_t roundSize(size_t size) { if(!size) return size; - return (size+7)&~7LL; // 8 bytes align in size + size = (size+7)&~7LL; // 8 bytes align in size + + if(size<THRESHOLD) + size = THRESHOLD; + + return size; } void* customMalloc(size_t size) @@ -285,12 +298,15 @@ void* customMalloc(size_t size) size = roundSize(size); // look for free space void* sub = NULL; + size_t fullsize = size+2*sizeof(blockmark_t); pthread_mutex_lock(&mutex_blocks); for(int i=0; i<n_blocks; ++i) { if(p_blocks[i].maxfree>=size) { size_t rsize = 0; - sub = getFirstBlock(p_blocks[i].first, size, &rsize, NULL); + sub = getFirstBlock(p_blocks[i].block, size, &rsize, p_blocks[i].first); if(sub) { + if(rsize-size<THRESHOLD) + size = rsize; void* ret = allocBlock(p_blocks[i].block, sub, size, NULL); if(sub==p_blocks[i].first) p_blocks[i].first = getNextFreeBlock(sub); @@ -303,10 +319,11 @@ void* customMalloc(size_t size) } // add a new block int i = n_blocks++; - p_blocks = (blocklist_t*)box_realloc(p_blocks, n_blocks*sizeof(blocklist_t)); - size_t allocsize = MMAPSIZE; - if(size+2*sizeof(blockmark_t)>allocsize) - allocsize = size+2*sizeof(blockmark_t); + if(n_blocks>c_blocks) { + c_blocks += 4; + p_blocks = (blocklist_t*)box_realloc(p_blocks, c_blocks*sizeof(blocklist_t)); + } + size_t allocsize = (fullsize>MMAPSIZE)?fullsize:MMAPSIZE; #ifdef USE_MMAP void* p = mmap(NULL, allocsize, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); memset(p, 0, allocsize); @@ -610,18 +627,29 @@ void FreeDynarecMap(dynablock_t* db, uintptr_t addr, size_t size) ActuallyFreeDynarecMap(db, addr, size); } -dynablocklist_t* getDB(uintptr_t idx) +dynablock_t* getDB(uintptr_t addr) { - // already 16bits shifted - uintptr_t idx3 = (idx>>32)&((1<<DYNAMAP_SHIFT)-1); - uintptr_t idx2 = (idx>>16)&((1<<DYNAMAP_SHIFT)-1); - uintptr_t idx1 = (idx )&((1<<DYNAMAP_SHIFT)-1); - - if(!dynmap123[idx3]) - return NULL; - if(!dynmap123[idx3][idx2]) + if(isJumpTableDefault64((void*)addr)) return NULL; - return dynmap123[idx3][idx2][idx1]; + uintptr_t ret = getJumpAddress64(addr); + return *(dynablock_t**)(ret-sizeof(void*)); +} +uintptr_t getDefaultSize(uintptr_t addr) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) + return (addr&~((1LL<<48)-1)|0xffffffffffffLL)-addr + 1; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) + return (addr&~((1LL<<32)-1)|0xffffffffLL)-addr + 1; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) + return (addr&~((1LL<<16)-1)|0xffffLL)-addr + 1; + idx0 = addr&0xffff; + while(idx0<0x10000 && box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)native_next) + ++idx0; + return idx0 - (addr&0xffff); } // each dynmap is 64k of size @@ -629,87 +657,29 @@ dynablocklist_t* getDB(uintptr_t idx) void addDBFromAddressRange(uintptr_t addr, size_t size) { dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1)); - uintptr_t idx = (addr>>DYNAMAP_SHIFT); - uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); - for (uintptr_t i=idx; i<=end; ++i) { - int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1); - int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1); - int idx1 = (i )&((1<<DYNAMAP_SHIFT)-1); - if(!dynmap123[idx3]) { - dynablocklist_t*** p = (dynablocklist_t***)box_calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t**)); - if(native_lock_storeifnull(&dynmap123[idx3], p)!=p) - box_free(p); - } - if(!dynmap123[idx3][idx2]) { - dynablocklist_t** p = (dynablocklist_t**)box_calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t*)); - if(native_lock_storeifnull(&dynmap123[idx3][idx2], p)!=p) - box_free(p); - } - if(!dynmap123[idx3][idx2][idx1]) { - dynablocklist_t* p = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0); - if(native_lock_storeifnull(&dynmap123[idx3][idx2][idx1], p)!=p) - FreeDynablockList(&p); - } - } -} - -static int dynmapempty(void** mem) -{ - for (int i=0; i<(1<<DYNAMAP_SHIFT); ++i) - if(mem[i]) - return 0; - return 1; + // do nothing, dynablock are allowed based on memory protection flags } void cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy) { - dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark"); - uintptr_t idx = (addr>>DYNAMAP_SHIFT); - uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); - for (uintptr_t i=idx; i<=end; ++i) { - int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1); - int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1); - int idx1 = (i )&((1<<DYNAMAP_SHIFT)-1); - if(dynmap123[idx3] && dynmap123[idx3][idx2]) { - dynablocklist_t* dblist = dynmap123[idx3][idx2][idx1]; - if(dblist) { - if(destroy) { - if(FreeRangeDynablock(dblist, addr, size) && 0) { // dblist is empty, check if we can delete more... - // disabling this for now. It seems to cause random crash in Terraria - if(!native_lock_storeifref(&dynmap123[idx3][idx2][idx1], NULL, dblist)) { - dynablocklist_t** p = dynmap123[idx3][idx2]; - if(dynmapempty((void**)p)) { - if(!native_lock_storeifref(&dynmap123[idx3][idx2], NULL, p)) { - dynablocklist_t*** p2 = dynmap123[idx3]; - if(dynmapempty((void**)p2)) { - if(!native_lock_storeifref(&dynmap123[idx3], NULL, p2)) { - box_free(p2); - } - } - box_free(p); - } - } - FreeDynablockList(&dblist); - } - } - } else - MarkRangeDynablock(dblist, addr, size); - } + uintptr_t start_addr = my_context?((addr<my_context->max_db_size)?0:(addr-my_context->max_db_size)):addr; + dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p/%p -> %p %s\n", (void*)addr, (void*)start_addr, (void*)(addr+size-1), destroy?"destroy":"mark"); + for (uintptr_t i=start_addr; i<addr+size; ++i) { + dynablock_t* db = getDB(i); + if(db) { + if(destroy) + FreeRangeDynablock(db, addr, size); + else + MarkRangeDynablock(db, addr, size); + } else { + uintptr_t next = getDefaultSize(i); + if(next) + i+=next-1; } } } -#ifdef ARM64 -void arm64_next(void); -#define native_next arm64_next -#elif defined(LA464) -void la464_next(void); -#define native_next la464_next -#else -#error Unsupported architecture -#endif - -void addJumpTableIfDefault64(void* addr, void* jmp) +int addJumpTableIfDefault64(void* addr, void* jmp) { uintptr_t idx3, idx2, idx1, idx0; idx3 = (((uintptr_t)addr)>>48)&0xffff; @@ -738,7 +708,7 @@ void addJumpTableIfDefault64(void* addr, void* jmp) box_free(tbl); } - native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next); + return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next)==jmp)?1:0; } void setJumpTableDefault64(void* addr) { @@ -755,6 +725,51 @@ void setJumpTableDefault64(void* addr) idx0 = (((uintptr_t)addr) )&0xffff; box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)native_next; } +void setJumpTableDefaultRef64(void* addr, void* jmp) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) + return; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) + return; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) + return; + idx0 = (((uintptr_t)addr) )&0xffff; + native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], native_next, jmp); +} +int setJumpTableIfRef64(void* addr, void* jmp, void* ref) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx0 = (((uintptr_t)addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { + uintptr_t*** tbl = (uintptr_t***)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault1; + if(native_lock_storeifref(&box64_jmptbl3[idx3], tbl, box64_jmptbldefault2)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) { + uintptr_t** tbl = (uintptr_t**)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault0; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2], tbl, box64_jmptbldefault1)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { + uintptr_t* tbl = (uintptr_t*)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = (uintptr_t)native_next; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1], tbl, box64_jmptbldefault0)!=tbl) + box_free(tbl); + } + return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, ref)==jmp)?1:0; +} int isJumpTableDefault64(void* addr) { uintptr_t idx3, idx2, idx1, idx0; @@ -807,6 +822,38 @@ uintptr_t getJumpTableAddress64(uintptr_t addr) return (uintptr_t)&box64_jmptbl3[idx3][idx2][idx1][idx0]; } +uintptr_t getJumpAddress64(uintptr_t addr) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = ((addr)>>48)&0xffff; + idx2 = ((addr)>>32)&0xffff; + idx1 = ((addr)>>16)&0xffff; + idx0 = ((addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { + uintptr_t*** tbl = (uintptr_t***)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault1; + if(native_lock_storeifref(&box64_jmptbl3[idx3], tbl, box64_jmptbldefault2)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) { + uintptr_t** tbl = (uintptr_t**)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault0; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2], tbl, box64_jmptbldefault1)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { + uintptr_t* tbl = (uintptr_t*)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = (uintptr_t)native_next; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1], tbl, box64_jmptbldefault0)!=tbl) + box_free(tbl); + } + + return (uintptr_t)box64_jmptbl3[idx3][idx2][idx1][idx0]; +} + // Remove the Write flag from an adress range, so DB can be executed safely void protectDB(uintptr_t addr, uintptr_t size) { @@ -820,24 +867,24 @@ void protectDB(uintptr_t addr, uintptr_t size) pthread_mutex_lock(&mutex_prot); int ret; for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); }*/ - memprot[i] = newblock; + memprot[i].prot = newblock; } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t prot = memprot[i>>16][i&0xffff]; + uint32_t prot = memprot[i>>16].prot[i&0xffff]; uint32_t dyn = prot&PROT_CUSTOM; prot&=~PROT_CUSTOM; if(!prot) prot = PROT_READ | PROT_WRITE | PROT_EXEC; // comes from malloc & co, so should not be able to execute if((prot&PROT_WRITE)) { if(!dyn) mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); - memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC; // need to use atomic exchange? + memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC; // need to use atomic exchange? } else - memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC_R; + memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC_R; } pthread_mutex_unlock(&mutex_prot); } @@ -855,23 +902,23 @@ void unprotectDB(uintptr_t addr, size_t size, int mark) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); }*/ - memprot[i] = newblock; + memprot[i].prot = newblock; } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t prot = memprot[i>>16][i&0xffff]; + uint32_t prot = memprot[i>>16].prot[i&0xffff]; if(prot&PROT_DYNAREC) { prot&=~PROT_CUSTOM; if(mark) cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0); mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot); - memprot[i>>16][i&0xffff] = prot; // need to use atomic exchange? + memprot[i>>16].prot[i&0xffff] = prot; // need to use atomic exchange? } else if(prot&PROT_DYNAREC_R) - memprot[i>>16][i&0xffff] = prot&~PROT_CUSTOM; + memprot[i>>16].prot[i&0xffff] = prot&~PROT_CUSTOM; } pthread_mutex_unlock(&mutex_prot); } @@ -888,7 +935,7 @@ int isprotectedDB(uintptr_t addr, size_t size) return 0; } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t prot = memprot[i>>16][i&0xffff]; + uint32_t prot = memprot[i>>16].prot[i&0xffff]; if(!(prot&PROT_DYNAREC || prot&PROT_DYNAREC_R)) { dynarec_log(LOG_DEBUG, "0\n"); return 0; @@ -1002,25 +1049,25 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); } #else - memprot[i] = newblock; + memprot[i].prot = newblock; #endif } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t dyn=(memprot[i>>16][i&0xffff]&(PROT_DYNAREC | PROT_DYNAREC_R)); + uint32_t dyn=(memprot[i>>16].prot[i&0xffff]&(PROT_DYNAREC | PROT_DYNAREC_R)); if(dyn && (prot&PROT_WRITE)) { // need to remove the write protection from this block dyn = PROT_DYNAREC; mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); } else if(dyn && !(prot&PROT_WRITE)) { dyn = PROT_DYNAREC_R; } - memprot[i>>16][i&0xffff] = prot|dyn; + memprot[i>>16].prot[i&0xffff] = prot|dyn; } pthread_mutex_unlock(&mutex_prot); } @@ -1036,18 +1083,18 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(MEMPROT_SIZE, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); } #else - memprot[i] = newblock; + memprot[i].prot = newblock; #endif } for (uintptr_t i=idx; i<=end; ++i) - memprot[i>>16][i&0xffff] = prot; + memprot[i>>16].prot[i&0xffff] = prot; pthread_mutex_unlock(&mutex_prot); } @@ -1063,20 +1110,20 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); } #else - memprot[i] = newblock; + memprot[i].prot = newblock; #endif } for (uintptr_t i=idx; i<=end; ++i) { const uintptr_t start = i&(MEMPROT_SIZE-1); const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1); - uint8_t* block = memprot[i>>16]; + uint8_t* block = memprot[i>>16].prot; for(uintptr_t ii = start; ii<=finish; ++ii) { if(!block[ii]) block[ii] = prot; @@ -1086,6 +1133,70 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) pthread_mutex_unlock(&mutex_prot); } +#ifdef DYNAREC +#define HOTPAGE_STEP 16 +int IsInHotPage(uintptr_t addr) { + if(addr<=(1LL<<48)) + return 0; + int idx = (addr>>MEMPROT_SHIFT)>>16; + if(!memprot[idx].hot) + return 0; + int base = (addr>>MEMPROT_SHIFT)&0xffff; + if(!memprot[idx].hot[base]) + return 0; + // decrement hot + pthread_mutex_lock(&mutex_prot); + memprot[idx].hot[base]-=1; + pthread_mutex_unlock(&mutex_prot); + return 1; +} + +int AreaInHotPage(uintptr_t start, uintptr_t end_) { + //dynarec_log(LOG_DEBUG, "AreaInHotPage %p -> %p => ", (void*)start, (void*)end_); + uintptr_t idx = (start>>MEMPROT_SHIFT); + uintptr_t end = (end_>>MEMPROT_SHIFT); + if(end>=(1LL<<(48-MEMPROT_SHIFT))) + end = (1LL<<(48-MEMPROT_SHIFT))-1LL; + if(end<idx) { // memory addresses higher than 48bits are not tracked + //dynarec_log(LOG_DEBUG, "00\n"); + return 0; + } + for (uintptr_t i=idx; i<=end; ++i) { + uint8_t *block = memprot[i>>16].hot; + uint32_t hot = block?block[i&0xffff]:0; + if(hot) { + // decrement hot + pthread_mutex_lock(&mutex_prot); + block[i&0xffff]-=1; + pthread_mutex_unlock(&mutex_prot); + //dynarec_log(LOG_DEBUG, "1\n"); + return 1; + } + } + //dynarec_log(LOG_DEBUG, "0\n"); + return 0; + +} + +void AddHotPage(uintptr_t addr) { + int idx = (addr>>MEMPROT_SHIFT)>>16; + int base = (addr>>MEMPROT_SHIFT)&0xffff; + pthread_mutex_lock(&mutex_prot); + if(!memprot[idx].hot) { + uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); +#if 0 //def ARM64 //disabled for now, not usefull with the mutex + if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + box_free(newblock); + } +#else + memprot[idx].hot = newblock; +#endif + } + memprot[idx].hot[base] = HOTPAGE_STEP; + pthread_mutex_unlock(&mutex_prot); +} +#endif + void loadProtectionFromMap() { if(box64_mapclean) @@ -1130,11 +1241,11 @@ void freeProtection(uintptr_t addr, size_t size) pthread_mutex_lock(&mutex_prot); for (uintptr_t i=idx; i<=end; ++i) { const uint32_t key = (i>>16); - if(memprot[key]!=memprot_default) { + if(memprot[key].prot!=memprot_default) { const uintptr_t start = i&(MEMPROT_SIZE-1); const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1); - uint8_t *block = memprot[key]; - memset(block+start, 0, finish-start+1); + uint8_t *block = memprot[key].prot; + memset(block+start, 0, (finish-start+1)*sizeof(uint8_t)); // blockempty is quite slow, so disable the free of blocks for now #if 0 //def ARM64 //disabled for now, not useful with the mutex if (blockempty(block)) { @@ -1149,8 +1260,13 @@ void freeProtection(uintptr_t addr, size_t size) } #else if(start==0 && finish==MEMPROT_SIZE-1) { - memprot[key] = memprot_default; + memprot[key].prot = memprot_default; box_free(block); + if(memprot[key].hot) { + uint8_t *hot = memprot[key].hot; + memprot[key].hot = NULL; + box_free(hot); + } } /*else if(blockempty(block)) { memprot[key] = memprot_default; @@ -1168,7 +1284,7 @@ uint32_t getProtection(uintptr_t addr) if(addr>=(1LL<<48)) return 0; const uintptr_t idx = (addr>>MEMPROT_SHIFT); - uint32_t ret = memprot[idx>>16][idx&0xffff]; + uint32_t ret = memprot[idx>>16].prot[idx&0xffff]; return ret; } @@ -1272,7 +1388,7 @@ void init_custommem_helper(box64context_t* ctx) inited = 1; memset(memprot_default, 0, sizeof(memprot_default)); for(int i=0; i<(1<<MEMPROT_SIZE0); ++i) - memprot[i] = memprot_default; + memprot[i].prot = memprot_default; init_mutexes(); #ifdef DYNAREC if(box64_dynarec) @@ -1328,18 +1444,6 @@ void fini_custommem_helper(box64context_t *ctx) } mmapsize = 0; mmapcap = 0; - dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n"); - for (uintptr_t idx3=0; idx3<=0xffff; ++idx3) - if(dynmap123[idx3]) { - for (uintptr_t idx2=0; idx2<=0xffff; ++idx2) - if(dynmap123[idx3][idx2]) { - for (uintptr_t idx1=0; idx1<=0xffff; ++idx1) - if(dynmap123[idx3][idx2][idx1]) - FreeDynablockList(&dynmap123[idx3][idx2][idx1]); - box_free(dynmap123[idx3][idx2]); - } - box_free(dynmap123[idx3]); - } box_free(mmaplist); for (int i3=0; i3<(1<<DYNAMAP_SHIFT); ++i3) @@ -1360,9 +1464,12 @@ void fini_custommem_helper(box64context_t *ctx) #endif uint8_t* m; for(int i=0; i<(1<<MEMPROT_SIZE0); ++i) { - m = memprot[i]; + m = memprot[i].prot; if(m!=memprot_default) box_free(m); + m = memprot[i].hot; + if(m) + box_free(m); } for(int i=0; i<n_blocks; ++i) |