diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2022-10-31 12:05:04 +0100 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2022-10-31 12:05:04 +0100 |
| commit | 8459ea0d31a851e847318273233c9a8b2b079eae (patch) | |
| tree | 2ab39af3c19d9720d906cd060211107c5dc549a2 /src | |
| parent | 494c245b591cef1a34cc4d4dc52163861d4e54af (diff) | |
| download | box64-8459ea0d31a851e847318273233c9a8b2b079eae.tar.gz box64-8459ea0d31a851e847318273233c9a8b2b079eae.zip | |
Remove dynablocklist (#438)
* [DYNAREC] Removed dynablock sons handling * [DYNAREC] Removed dynablocklist, and attached dynablocks to the jumptable * [DYNAREC] Changed handling of HotPage (part of Protection now) * [DYNAREC] Fixed some performance issues (especially with heavily threaded programs)
Diffstat (limited to 'src')
| -rw-r--r-- | src/custommem.c | 373 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_helper.c | 26 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_jmpnext.c | 11 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_pass2.h | 1 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_pass3.h | 4 | ||||
| -rwxr-xr-x | src/dynarec/arm64/dynarec_arm64_private.h | 7 | ||||
| -rwxr-xr-x | src/dynarec/dynablock.c | 499 | ||||
| -rwxr-xr-x | src/dynarec/dynablock_private.h | 22 | ||||
| -rwxr-xr-x | src/dynarec/dynarec.c | 40 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_native.c | 163 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_native_pass.c | 3 | ||||
| -rw-r--r-- | src/dynarec/dynarec_next.h | 22 | ||||
| -rwxr-xr-x | src/elfs/elfloader.c | 4 | ||||
| -rwxr-xr-x | src/elfs/elfloader_private.h | 4 | ||||
| -rwxr-xr-x | src/include/box64context.h | 2 | ||||
| -rw-r--r-- | src/include/custommem.h | 11 | ||||
| -rwxr-xr-x | src/include/dynablock.h | 19 | ||||
| -rwxr-xr-x | src/include/elfloader.h | 5 | ||||
| -rwxr-xr-x | src/libtools/threads.c | 6 |
19 files changed, 475 insertions, 747 deletions
diff --git a/src/custommem.c b/src/custommem.c index e8abdd86..3264b8ec 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -26,12 +26,12 @@ #ifdef DYNAREC #include "dynablock.h" #include "dynarec/native_lock.h" +#include "dynarec/dynarec_next.h" //#define USE_MMAP // init inside dynablocks.c KHASH_MAP_INIT_INT64(dynablocks, dynablock_t*) -static dynablocklist_t*** dynmap123[1<<DYNAMAP_SHIFT] = {0}; // 64bits.. in 4x16bits array static mmaplist_t *mmaplist = NULL; static size_t mmapsize = 0; static size_t mmapcap = 0; @@ -60,8 +60,13 @@ static pthread_mutex_t mutex_prot; #endif #define MEMPROT_SIZE (1<<16) #define MEMPROT_SIZE0 (48-MEMPROT_SHIFT2) -static uint8_t *volatile memprot[1<<MEMPROT_SIZE0]; // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection] -static uint8_t memprot_default[MEMPROT_SIZE]; +typedef struct memprot_s +{ + uint8_t* prot; + uint8_t* hot; +} memprot_t; +static memprot_t memprot[1<<MEMPROT_SIZE0]; // x86_64 mem is 48bits, page is 12bits, so memory is tracked as [20][16][page protection] +static uint8_t memprot_default[MEMPROT_SIZE]; static int inited = 0; typedef struct mapmem_s { @@ -82,6 +87,7 @@ typedef struct blocklist_s { static pthread_mutex_t mutex_blocks; static int n_blocks = 0; // number of blocks for custom malloc +static int c_blocks = 0; // capacity of blocks for custom malloc static blocklist_t* p_blocks = NULL; // actual blocks for custom malloc typedef union mark_s { @@ -273,11 +279,18 @@ static size_t sizeBlock(void* sub) return s->next.size; } +#define THRESHOLD (128-2*sizeof(blockmark_t)) + static size_t roundSize(size_t size) { if(!size) return size; - return (size+7)&~7LL; // 8 bytes align in size + size = (size+7)&~7LL; // 8 bytes align in size + + if(size<THRESHOLD) + size = THRESHOLD; + + return size; } void* customMalloc(size_t size) @@ -285,12 +298,15 @@ void* customMalloc(size_t size) size = roundSize(size); // look for free space void* sub = NULL; + size_t fullsize = size+2*sizeof(blockmark_t); pthread_mutex_lock(&mutex_blocks); for(int i=0; i<n_blocks; ++i) { if(p_blocks[i].maxfree>=size) { size_t rsize = 0; - sub = getFirstBlock(p_blocks[i].first, size, &rsize, NULL); + sub = getFirstBlock(p_blocks[i].block, size, &rsize, p_blocks[i].first); if(sub) { + if(rsize-size<THRESHOLD) + size = rsize; void* ret = allocBlock(p_blocks[i].block, sub, size, NULL); if(sub==p_blocks[i].first) p_blocks[i].first = getNextFreeBlock(sub); @@ -303,10 +319,11 @@ void* customMalloc(size_t size) } // add a new block int i = n_blocks++; - p_blocks = (blocklist_t*)box_realloc(p_blocks, n_blocks*sizeof(blocklist_t)); - size_t allocsize = MMAPSIZE; - if(size+2*sizeof(blockmark_t)>allocsize) - allocsize = size+2*sizeof(blockmark_t); + if(n_blocks>c_blocks) { + c_blocks += 4; + p_blocks = (blocklist_t*)box_realloc(p_blocks, c_blocks*sizeof(blocklist_t)); + } + size_t allocsize = (fullsize>MMAPSIZE)?fullsize:MMAPSIZE; #ifdef USE_MMAP void* p = mmap(NULL, allocsize, PROT_READ|PROT_WRITE, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); memset(p, 0, allocsize); @@ -610,18 +627,29 @@ void FreeDynarecMap(dynablock_t* db, uintptr_t addr, size_t size) ActuallyFreeDynarecMap(db, addr, size); } -dynablocklist_t* getDB(uintptr_t idx) +dynablock_t* getDB(uintptr_t addr) { - // already 16bits shifted - uintptr_t idx3 = (idx>>32)&((1<<DYNAMAP_SHIFT)-1); - uintptr_t idx2 = (idx>>16)&((1<<DYNAMAP_SHIFT)-1); - uintptr_t idx1 = (idx )&((1<<DYNAMAP_SHIFT)-1); - - if(!dynmap123[idx3]) - return NULL; - if(!dynmap123[idx3][idx2]) + if(isJumpTableDefault64((void*)addr)) return NULL; - return dynmap123[idx3][idx2][idx1]; + uintptr_t ret = getJumpAddress64(addr); + return *(dynablock_t**)(ret-sizeof(void*)); +} +uintptr_t getDefaultSize(uintptr_t addr) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) + return (addr&~((1LL<<48)-1)|0xffffffffffffLL)-addr + 1; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) + return (addr&~((1LL<<32)-1)|0xffffffffLL)-addr + 1; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) + return (addr&~((1LL<<16)-1)|0xffffLL)-addr + 1; + idx0 = addr&0xffff; + while(idx0<0x10000 && box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)native_next) + ++idx0; + return idx0 - (addr&0xffff); } // each dynmap is 64k of size @@ -629,87 +657,29 @@ dynablocklist_t* getDB(uintptr_t idx) void addDBFromAddressRange(uintptr_t addr, size_t size) { dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1)); - uintptr_t idx = (addr>>DYNAMAP_SHIFT); - uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); - for (uintptr_t i=idx; i<=end; ++i) { - int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1); - int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1); - int idx1 = (i )&((1<<DYNAMAP_SHIFT)-1); - if(!dynmap123[idx3]) { - dynablocklist_t*** p = (dynablocklist_t***)box_calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t**)); - if(native_lock_storeifnull(&dynmap123[idx3], p)!=p) - box_free(p); - } - if(!dynmap123[idx3][idx2]) { - dynablocklist_t** p = (dynablocklist_t**)box_calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t*)); - if(native_lock_storeifnull(&dynmap123[idx3][idx2], p)!=p) - box_free(p); - } - if(!dynmap123[idx3][idx2][idx1]) { - dynablocklist_t* p = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0); - if(native_lock_storeifnull(&dynmap123[idx3][idx2][idx1], p)!=p) - FreeDynablockList(&p); - } - } -} - -static int dynmapempty(void** mem) -{ - for (int i=0; i<(1<<DYNAMAP_SHIFT); ++i) - if(mem[i]) - return 0; - return 1; + // do nothing, dynablock are allowed based on memory protection flags } void cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy) { - dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark"); - uintptr_t idx = (addr>>DYNAMAP_SHIFT); - uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); - for (uintptr_t i=idx; i<=end; ++i) { - int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1); - int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1); - int idx1 = (i )&((1<<DYNAMAP_SHIFT)-1); - if(dynmap123[idx3] && dynmap123[idx3][idx2]) { - dynablocklist_t* dblist = dynmap123[idx3][idx2][idx1]; - if(dblist) { - if(destroy) { - if(FreeRangeDynablock(dblist, addr, size) && 0) { // dblist is empty, check if we can delete more... - // disabling this for now. It seems to cause random crash in Terraria - if(!native_lock_storeifref(&dynmap123[idx3][idx2][idx1], NULL, dblist)) { - dynablocklist_t** p = dynmap123[idx3][idx2]; - if(dynmapempty((void**)p)) { - if(!native_lock_storeifref(&dynmap123[idx3][idx2], NULL, p)) { - dynablocklist_t*** p2 = dynmap123[idx3]; - if(dynmapempty((void**)p2)) { - if(!native_lock_storeifref(&dynmap123[idx3], NULL, p2)) { - box_free(p2); - } - } - box_free(p); - } - } - FreeDynablockList(&dblist); - } - } - } else - MarkRangeDynablock(dblist, addr, size); - } + uintptr_t start_addr = my_context?((addr<my_context->max_db_size)?0:(addr-my_context->max_db_size)):addr; + dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p/%p -> %p %s\n", (void*)addr, (void*)start_addr, (void*)(addr+size-1), destroy?"destroy":"mark"); + for (uintptr_t i=start_addr; i<addr+size; ++i) { + dynablock_t* db = getDB(i); + if(db) { + if(destroy) + FreeRangeDynablock(db, addr, size); + else + MarkRangeDynablock(db, addr, size); + } else { + uintptr_t next = getDefaultSize(i); + if(next) + i+=next-1; } } } -#ifdef ARM64 -void arm64_next(void); -#define native_next arm64_next -#elif defined(LA464) -void la464_next(void); -#define native_next la464_next -#else -#error Unsupported architecture -#endif - -void addJumpTableIfDefault64(void* addr, void* jmp) +int addJumpTableIfDefault64(void* addr, void* jmp) { uintptr_t idx3, idx2, idx1, idx0; idx3 = (((uintptr_t)addr)>>48)&0xffff; @@ -738,7 +708,7 @@ void addJumpTableIfDefault64(void* addr, void* jmp) box_free(tbl); } - native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next); + return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, native_next)==jmp)?1:0; } void setJumpTableDefault64(void* addr) { @@ -755,6 +725,51 @@ void setJumpTableDefault64(void* addr) idx0 = (((uintptr_t)addr) )&0xffff; box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)native_next; } +void setJumpTableDefaultRef64(void* addr, void* jmp) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) + return; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) + return; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) + return; + idx0 = (((uintptr_t)addr) )&0xffff; + native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], native_next, jmp); +} +int setJumpTableIfRef64(void* addr, void* jmp, void* ref) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx0 = (((uintptr_t)addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { + uintptr_t*** tbl = (uintptr_t***)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault1; + if(native_lock_storeifref(&box64_jmptbl3[idx3], tbl, box64_jmptbldefault2)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) { + uintptr_t** tbl = (uintptr_t**)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault0; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2], tbl, box64_jmptbldefault1)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { + uintptr_t* tbl = (uintptr_t*)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = (uintptr_t)native_next; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1], tbl, box64_jmptbldefault0)!=tbl) + box_free(tbl); + } + return (native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1][idx0], jmp, ref)==jmp)?1:0; +} int isJumpTableDefault64(void* addr) { uintptr_t idx3, idx2, idx1, idx0; @@ -807,6 +822,38 @@ uintptr_t getJumpTableAddress64(uintptr_t addr) return (uintptr_t)&box64_jmptbl3[idx3][idx2][idx1][idx0]; } +uintptr_t getJumpAddress64(uintptr_t addr) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = ((addr)>>48)&0xffff; + idx2 = ((addr)>>32)&0xffff; + idx1 = ((addr)>>16)&0xffff; + idx0 = ((addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { + uintptr_t*** tbl = (uintptr_t***)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault1; + if(native_lock_storeifref(&box64_jmptbl3[idx3], tbl, box64_jmptbldefault2)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) { + uintptr_t** tbl = (uintptr_t**)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault0; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2], tbl, box64_jmptbldefault1)!=tbl) + box_free(tbl); + } + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { + uintptr_t* tbl = (uintptr_t*)box_malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = (uintptr_t)native_next; + if(native_lock_storeifref(&box64_jmptbl3[idx3][idx2][idx1], tbl, box64_jmptbldefault0)!=tbl) + box_free(tbl); + } + + return (uintptr_t)box64_jmptbl3[idx3][idx2][idx1][idx0]; +} + // Remove the Write flag from an adress range, so DB can be executed safely void protectDB(uintptr_t addr, uintptr_t size) { @@ -820,24 +867,24 @@ void protectDB(uintptr_t addr, uintptr_t size) pthread_mutex_lock(&mutex_prot); int ret; for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); }*/ - memprot[i] = newblock; + memprot[i].prot = newblock; } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t prot = memprot[i>>16][i&0xffff]; + uint32_t prot = memprot[i>>16].prot[i&0xffff]; uint32_t dyn = prot&PROT_CUSTOM; prot&=~PROT_CUSTOM; if(!prot) prot = PROT_READ | PROT_WRITE | PROT_EXEC; // comes from malloc & co, so should not be able to execute if((prot&PROT_WRITE)) { if(!dyn) mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); - memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC; // need to use atomic exchange? + memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC; // need to use atomic exchange? } else - memprot[i>>16][i&0xffff] = prot|PROT_DYNAREC_R; + memprot[i>>16].prot[i&0xffff] = prot|PROT_DYNAREC_R; } pthread_mutex_unlock(&mutex_prot); } @@ -855,23 +902,23 @@ void unprotectDB(uintptr_t addr, size_t size, int mark) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); /*if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); }*/ - memprot[i] = newblock; + memprot[i].prot = newblock; } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t prot = memprot[i>>16][i&0xffff]; + uint32_t prot = memprot[i>>16].prot[i&0xffff]; if(prot&PROT_DYNAREC) { prot&=~PROT_CUSTOM; if(mark) cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0); mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot); - memprot[i>>16][i&0xffff] = prot; // need to use atomic exchange? + memprot[i>>16].prot[i&0xffff] = prot; // need to use atomic exchange? } else if(prot&PROT_DYNAREC_R) - memprot[i>>16][i&0xffff] = prot&~PROT_CUSTOM; + memprot[i>>16].prot[i&0xffff] = prot&~PROT_CUSTOM; } pthread_mutex_unlock(&mutex_prot); } @@ -888,7 +935,7 @@ int isprotectedDB(uintptr_t addr, size_t size) return 0; } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t prot = memprot[i>>16][i&0xffff]; + uint32_t prot = memprot[i>>16].prot[i&0xffff]; if(!(prot&PROT_DYNAREC || prot&PROT_DYNAREC_R)) { dynarec_log(LOG_DEBUG, "0\n"); return 0; @@ -1002,25 +1049,25 @@ void updateProtection(uintptr_t addr, size_t size, uint32_t prot) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); } #else - memprot[i] = newblock; + memprot[i].prot = newblock; #endif } for (uintptr_t i=idx; i<=end; ++i) { - uint32_t dyn=(memprot[i>>16][i&0xffff]&(PROT_DYNAREC | PROT_DYNAREC_R)); + uint32_t dyn=(memprot[i>>16].prot[i&0xffff]&(PROT_DYNAREC | PROT_DYNAREC_R)); if(dyn && (prot&PROT_WRITE)) { // need to remove the write protection from this block dyn = PROT_DYNAREC; mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); } else if(dyn && !(prot&PROT_WRITE)) { dyn = PROT_DYNAREC_R; } - memprot[i>>16][i&0xffff] = prot|dyn; + memprot[i>>16].prot[i&0xffff] = prot|dyn; } pthread_mutex_unlock(&mutex_prot); } @@ -1036,18 +1083,18 @@ void setProtection(uintptr_t addr, size_t size, uint32_t prot) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(MEMPROT_SIZE, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); } #else - memprot[i] = newblock; + memprot[i].prot = newblock; #endif } for (uintptr_t i=idx; i<=end; ++i) - memprot[i>>16][i&0xffff] = prot; + memprot[i>>16].prot[i&0xffff] = prot; pthread_mutex_unlock(&mutex_prot); } @@ -1063,20 +1110,20 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) return; pthread_mutex_lock(&mutex_prot); for (uintptr_t i=(idx>>16); i<=(end>>16); ++i) - if(memprot[i]==memprot_default) { + if(memprot[i].prot==memprot_default) { uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); #if 0 //def ARM64 //disabled for now, not usefull with the mutex if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { box_free(newblock); } #else - memprot[i] = newblock; + memprot[i].prot = newblock; #endif } for (uintptr_t i=idx; i<=end; ++i) { const uintptr_t start = i&(MEMPROT_SIZE-1); const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1); - uint8_t* block = memprot[i>>16]; + uint8_t* block = memprot[i>>16].prot; for(uintptr_t ii = start; ii<=finish; ++ii) { if(!block[ii]) block[ii] = prot; @@ -1086,6 +1133,70 @@ void allocProtection(uintptr_t addr, size_t size, uint32_t prot) pthread_mutex_unlock(&mutex_prot); } +#ifdef DYNAREC +#define HOTPAGE_STEP 16 +int IsInHotPage(uintptr_t addr) { + if(addr<=(1LL<<48)) + return 0; + int idx = (addr>>MEMPROT_SHIFT)>>16; + if(!memprot[idx].hot) + return 0; + int base = (addr>>MEMPROT_SHIFT)&0xffff; + if(!memprot[idx].hot[base]) + return 0; + // decrement hot + pthread_mutex_lock(&mutex_prot); + memprot[idx].hot[base]-=1; + pthread_mutex_unlock(&mutex_prot); + return 1; +} + +int AreaInHotPage(uintptr_t start, uintptr_t end_) { + //dynarec_log(LOG_DEBUG, "AreaInHotPage %p -> %p => ", (void*)start, (void*)end_); + uintptr_t idx = (start>>MEMPROT_SHIFT); + uintptr_t end = (end_>>MEMPROT_SHIFT); + if(end>=(1LL<<(48-MEMPROT_SHIFT))) + end = (1LL<<(48-MEMPROT_SHIFT))-1LL; + if(end<idx) { // memory addresses higher than 48bits are not tracked + //dynarec_log(LOG_DEBUG, "00\n"); + return 0; + } + for (uintptr_t i=idx; i<=end; ++i) { + uint8_t *block = memprot[i>>16].hot; + uint32_t hot = block?block[i&0xffff]:0; + if(hot) { + // decrement hot + pthread_mutex_lock(&mutex_prot); + block[i&0xffff]-=1; + pthread_mutex_unlock(&mutex_prot); + //dynarec_log(LOG_DEBUG, "1\n"); + return 1; + } + } + //dynarec_log(LOG_DEBUG, "0\n"); + return 0; + +} + +void AddHotPage(uintptr_t addr) { + int idx = (addr>>MEMPROT_SHIFT)>>16; + int base = (addr>>MEMPROT_SHIFT)&0xffff; + pthread_mutex_lock(&mutex_prot); + if(!memprot[idx].hot) { + uint8_t* newblock = box_calloc(1<<16, sizeof(uint8_t)); +#if 0 //def ARM64 //disabled for now, not usefull with the mutex + if (native_lock_storeifref(&memprot[i], newblock, memprot_default) != newblock) { + box_free(newblock); + } +#else + memprot[idx].hot = newblock; +#endif + } + memprot[idx].hot[base] = HOTPAGE_STEP; + pthread_mutex_unlock(&mutex_prot); +} +#endif + void loadProtectionFromMap() { if(box64_mapclean) @@ -1130,11 +1241,11 @@ void freeProtection(uintptr_t addr, size_t size) pthread_mutex_lock(&mutex_prot); for (uintptr_t i=idx; i<=end; ++i) { const uint32_t key = (i>>16); - if(memprot[key]!=memprot_default) { + if(memprot[key].prot!=memprot_default) { const uintptr_t start = i&(MEMPROT_SIZE-1); const uintptr_t finish = (((i|(MEMPROT_SIZE-1))<end)?(MEMPROT_SIZE-1):end)&(MEMPROT_SIZE-1); - uint8_t *block = memprot[key]; - memset(block+start, 0, finish-start+1); + uint8_t *block = memprot[key].prot; + memset(block+start, 0, (finish-start+1)*sizeof(uint8_t)); // blockempty is quite slow, so disable the free of blocks for now #if 0 //def ARM64 //disabled for now, not useful with the mutex if (blockempty(block)) { @@ -1149,8 +1260,13 @@ void freeProtection(uintptr_t addr, size_t size) } #else if(start==0 && finish==MEMPROT_SIZE-1) { - memprot[key] = memprot_default; + memprot[key].prot = memprot_default; box_free(block); + if(memprot[key].hot) { + uint8_t *hot = memprot[key].hot; + memprot[key].hot = NULL; + box_free(hot); + } } /*else if(blockempty(block)) { memprot[key] = memprot_default; @@ -1168,7 +1284,7 @@ uint32_t getProtection(uintptr_t addr) if(addr>=(1LL<<48)) return 0; const uintptr_t idx = (addr>>MEMPROT_SHIFT); - uint32_t ret = memprot[idx>>16][idx&0xffff]; + uint32_t ret = memprot[idx>>16].prot[idx&0xffff]; return ret; } @@ -1272,7 +1388,7 @@ void init_custommem_helper(box64context_t* ctx) inited = 1; memset(memprot_default, 0, sizeof(memprot_default)); for(int i=0; i<(1<<MEMPROT_SIZE0); ++i) - memprot[i] = memprot_default; + memprot[i].prot = memprot_default; init_mutexes(); #ifdef DYNAREC if(box64_dynarec) @@ -1328,18 +1444,6 @@ void fini_custommem_helper(box64context_t *ctx) } mmapsize = 0; mmapcap = 0; - dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n"); - for (uintptr_t idx3=0; idx3<=0xffff; ++idx3) - if(dynmap123[idx3]) { - for (uintptr_t idx2=0; idx2<=0xffff; ++idx2) - if(dynmap123[idx3][idx2]) { - for (uintptr_t idx1=0; idx1<=0xffff; ++idx1) - if(dynmap123[idx3][idx2][idx1]) - FreeDynablockList(&dynmap123[idx3][idx2][idx1]); - box_free(dynmap123[idx3][idx2]); - } - box_free(dynmap123[idx3]); - } box_free(mmaplist); for (int i3=0; i3<(1<<DYNAMAP_SHIFT); ++i3) @@ -1360,9 +1464,12 @@ void fini_custommem_helper(box64context_t *ctx) #endif uint8_t* m; for(int i=0; i<(1<<MEMPROT_SIZE0); ++i) { - m = memprot[i]; + m = memprot[i].prot; if(m!=memprot_default) box_free(m); + m = memprot[i].hot; + if(m) + box_free(m); } for(int i=0; i<n_blocks; ++i) diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index 521b0604..d38c8078 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -403,30 +403,30 @@ void jump_to_next(dynarec_arm_t* dyn, uintptr_t ip, int reg, int ninst) } uintptr_t tbl = getJumpTable64(); MAYUSE(tbl); - TABLE64(x2, tbl); - UBFXx(x3, xRIP, 48, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 32, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 16, JMPTABL_SHIFT); - LDRx_REG_LSL3(x2, x2, x3); - UBFXx(x3, xRIP, 0, JMPTABL_SHIFT); - LDRx_REG_LSL3(x3, x2, x3); + TABLE64(x3, tbl); + UBFXx(x2, xRIP, 48, JMPTABL_SHIFT); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, 32, JMPTABL_SHIFT); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, 16, JMPTABL_SHIFT); + LDRx_REG_LSL3(x3, x3, x2); + UBFXx(x2, xRIP, 0, JMPTABL_SHIFT); + LDRx_REG_LSL3(x2, x3, x2); } else { uintptr_t p = getJumpTableAddress64(ip); MAYUSE(p); - TABLE64(x2, p); + TABLE64(x3, p); GETIP_(ip); - LDRx_U12(x3, x2, 0); + LDRx_U12(x2, x3, 0); } if(reg!=x1) { MOVx_REG(x1, xRIP); } CLEARIP(); #ifdef HAVE_TRACE - //MOVx(x2, 15); no access to PC reg + //MOVx(x3, 15); no access to PC reg #endif - BLR(x3); // save LR... + BLR(x2); // save LR... } void ret_to_epilog(dynarec_arm_t* dyn, int ninst) diff --git a/src/dynarec/arm64/dynarec_arm64_jmpnext.c b/src/dynarec/arm64/dynarec_arm64_jmpnext.c new file mode 100644 index 00000000..c40dfb1e --- /dev/null +++ b/src/dynarec/arm64/dynarec_arm64_jmpnext.c @@ -0,0 +1,11 @@ +#include <stdint.h> + +#include "arm64_emitter.h" + +#define EMIT(A) *block = (A); ++block +void CreateJmpNext(void* addr, void* next) +{ + uint32_t* block = (uint32_t*)addr; + LDRx_literal(x2, (intptr_t)next - (intptr_t)addr); + BR(x2); +} \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_pass2.h b/src/dynarec/arm64/dynarec_arm64_pass2.h index 4a5122c2..29d5f01a 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass2.h +++ b/src/dynarec/arm64/dynarec_arm64_pass2.h @@ -8,7 +8,6 @@ dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \ if(ninst && isInstClean(dyn, ninst)) { \ if(dyn->last_ip!=ip) dyn->last_ip = 0; \ - ++dyn->sons_size; \ } \ } #define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size; diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h index d1d8fbba..2a003711 100755 --- a/src/dynarec/arm64/dynarec_arm64_pass3.h +++ b/src/dynarec/arm64/dynarec_arm64_pass3.h @@ -10,10 +10,6 @@ #define NEW_INST \ if(ninst && isInstClean(dyn, ninst)) { \ if(dyn->last_ip!=ip) dyn->last_ip = 0; \ - dyn->sons_x64[dyn->sons_size] = (uintptr_t)ip; \ - dyn->sons_native[dyn->sons_size] = dyn->block; \ - MESSAGE(LOG_DUMP, "----> potential Son here %p/%p\n", (void*)ip, dyn->block); \ - ++dyn->sons_size; \ } #define INST_EPILOG #define INST_NAME(name) \ diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 23fe4af5..1cac7d3e 100755 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -5,6 +5,7 @@ typedef struct x64emu_s x64emu_t; typedef struct dynablock_s dynablock_t; +typedef struct instsize_s instsize_t; #define BARRIER_MAYBE 8 @@ -99,11 +100,9 @@ typedef struct dynarec_arm_s { uintptr_t* next; // variable array of "next" jump address int next_sz; int next_cap; - uintptr_t* sons_x64; // the x64 address of potential dynablock sons - void** sons_native; // the arm address of potential dynablock sons - int sons_size; // number of potential dynablock sons int* predecessor;// single array of all predecessor dynablock_t* dynablock; + instsize_t* instsize; } dynarec_arm_t; void add_next(dynarec_arm_t *dyn, uintptr_t addr); @@ -113,6 +112,8 @@ int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); int Table64(dynarec_arm_t *dyn, uint64_t val); // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal +void CreateJmpNext(void* addr, void* next); + #define GO_TRACE() \ GETIP_(ip); \ MOVx_REG(x1, xRIP); \ diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index 43222a2e..b795c3c1 100755 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -3,6 +3,7 @@ #include <pthread.h> #include <errno.h> #include <setjmp.h> +#include <sys/mman.h> #include "debug.h" #include "box64context.h" @@ -37,111 +38,37 @@ uint32_t X31_hash_code(void* addr, int len) return (uint32_t)h; } -dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct) -{ - if(!textsz) { - printf_log(LOG_NONE, "Error, creating a NULL sized Dynablock\n"); - return NULL; - } - dynablocklist_t* ret = (dynablocklist_t*)customCalloc(1, sizeof(dynablocklist_t)); - ret->text = text; - ret->textsz = textsz; - ret->minstart = text; - ret->maxend = text+textsz-1; - if(direct && textsz) { - ret->direct = (dynablock_t**)customCalloc(textsz, sizeof(dynablock_t*)); - if(!ret->direct) {printf_log(LOG_NONE, "Warning, fail to create direct block for dynablock @%p\n", (void*)text);} - } - dynarec_log(LOG_DEBUG, "New Dynablocklist %p, from %p->%p\n", ret, (void*)text, (void*)(text+textsz)); - return ret; -} - void FreeDynablock(dynablock_t* db, int need_lock) { if(db) { if(db->gone) return; // already in the process of deletion! - dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p parent=%p, father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size-1, db->parent, db->father, db->sons_size, db->gone); + dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size-1, db->gone); if(need_lock) pthread_mutex_lock(&my_context->mutex_dyndump); - db->done = 0; - db->gone = 1; - // remove from direct if there - uintptr_t startdb = db->parent->text; - uintptr_t enddb = db->parent->text + db->parent->textsz; - if(db->parent->direct) { - uintptr_t addr = (uintptr_t)db->x64_addr; - if(addr>=startdb && addr<enddb) - native_lock_xchg(&db->parent->direct[addr-startdb], 0); // secured write - } // remove jumptable setJumpTableDefault64(db->x64_addr); - // remove and free the sons - for (int i=0; i<db->sons_size; ++i) { - dynablock_t *son = (dynablock_t*)native_lock_xchg(&db->sons[i], 0); - FreeDynablock(son, 0); - } - // only the father free the DynarecMap - if(!db->father) { - dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->block, db->size); - FreeDynarecMap(db, (uintptr_t)db->block, db->size); - customFree(db->sons); - customFree(db->instsize); - } + dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->actual_block, db->size); + db->done = 0; + db->gone = 1; + FreeDynarecMap(db, (uintptr_t)db->actual_block, db->size); customFree(db); if(need_lock) pthread_mutex_unlock(&my_context->mutex_dyndump); } } -void FreeDynablockList(dynablocklist_t** dynablocks) -{ - if(!dynablocks) - return; - if(!*dynablocks) - return; - dynarec_log(LOG_DEBUG, "Free Dynablocklist %p, with Direct Blocks %p\n", *dynablocks, (*dynablocks)->direct); - if((*dynablocks)->direct) { - for (int i=0; i<(*dynablocks)->textsz; ++i) { - if((*dynablocks)->direct[i] && !(*dynablocks)->direct[i]->father) - FreeDynablock((*dynablocks)->direct[i], 1); - } - customFree((*dynablocks)->direct); - } - (*dynablocks)->direct = NULL; - - customFree(*dynablocks); - *dynablocks = NULL; -} - void MarkDynablock(dynablock_t* db) { if(db) { - if(db->father) - db = db->father; // mark only father if(db->need_test) return; // already done - dynarec_log(LOG_DEBUG, "MarkDynablock %p with %d son(s) %p-%p\n", db, db->sons_size, db->x64_addr, db->x64_addr+db->x64_size-1); + dynarec_log(LOG_DEBUG, "MarkDynablock %p %p-%p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1); db->need_test = 1; - setJumpTableDefault64(db->x64_addr); - for(int i=0; i<db->sons_size; ++i) - setJumpTableDefault64(db->sons[i]->x64_addr); + setJumpTableIfRef64(db->x64_addr, db->jmpnext, db->block); } } -uintptr_t StartDynablockList(dynablocklist_t* db) -{ - if(db) - return db->text; - return 0; -} -uintptr_t EndDynablockList(dynablocklist_t* db) -{ - if(db) - return db->text+db->textsz-1; - return 0; -} - int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2) { if(start1 > end2 || start2 > end1) @@ -152,90 +79,35 @@ int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintp static int MarkedDynablock(dynablock_t* db) { if(db) { - if(db->father) - db = db->father; // mark only father if(db->need_test) return 1; // already done } return 0; } -void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +void MarkRangeDynablock(dynablock_t* db, uintptr_t addr, uintptr_t size) { // Mark will try to find *any* blocks that intersect the range to mark - if(!dynablocks) - return; - if(!dynablocks->direct) + if(!db) return; - uintptr_t startdb = dynablocks->text; - uintptr_t sizedb = dynablocks->textsz; - dynablock_t *db; - dynarec_log(LOG_DEBUG, "MarkDirectDynablock %p-%p .. startdb=%p, sizedb=%p\n", (void*)addr, (void*)addr+size-1, (void*)startdb, (void*)sizedb); - for(uintptr_t i = 0; i<sizedb; ++i) - if((db=dynablocks->direct[i]) && !MarkedDynablock(db)) - if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1)) - MarkDynablock(db); + dynarec_log(LOG_DEBUG, "MarkRangeDynablock %p-%p .. startdb=%p, sizedb=%p\n", (void*)addr, (void*)addr+size-1, (void*)db->x64_addr, (void*)db->x64_size); + if(!MarkedDynablock(db)) + if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1)) + MarkDynablock(db); } -int FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +int FreeRangeDynablock(dynablock_t* db, uintptr_t addr, uintptr_t size) { - if(!dynablocks) + if(!db) return 1; - if(dynablocks->direct) { - int need_lock = my_context?1:0; - dynablock_t* db; - int ret; - khint_t k; - kh_dynablocks_t *blocks = kh_init(dynablocks); - // copy in a temporary list - uintptr_t startdb = dynablocks->text; - uintptr_t enddb = startdb + dynablocks->textsz; - uintptr_t start = addr; - uintptr_t end = addr+size; - if(start<startdb) - start = startdb; - if(end>enddb) - end = enddb; - if(end>startdb && start<enddb) - for(uintptr_t i = start; i<end; ++i) { - db = (dynablock_t*)native_lock_xchg(&dynablocks->direct[i-startdb], 0); - if(db) { - if(db->father) - db = db->father; - if(db->parent==dynablocks) { - k = kh_put(dynablocks, blocks, (uintptr_t)db, &ret); - kh_value(blocks, k) = db; - } - } - } - // purge the list - kh_foreach_value(blocks, db, - FreeDynablock(db, need_lock); - ); - kh_destroy(dynablocks, blocks); - // check emptyness - for(uintptr_t i=0; i<dynablocks->textsz; ++i) - if(dynablocks->direct[i]) - return 0; - return 1; + int need_lock = my_context?1:0; + if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1)) { + FreeDynablock(db, need_lock); + return 0; } return 1; } -void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) -{ - if(!dynablocks) - return; - dynarec_log(LOG_DEBUG, "MarkRangeDynablock %p-%p\n", (void*)addr, (void*)addr+size-1); - if(dynablocks->direct) { - uintptr_t new_addr = dynablocks->minstart; - uintptr_t new_size = dynablocks->maxend - new_addr + 1; - MarkDirectDynablock(dynablocks, addr, size); - // the blocks check before - for(unsigned idx=(new_addr)>>DYNAMAP_SHIFT; idx<(addr>>DYNAMAP_SHIFT); ++idx) - MarkDirectDynablock(getDB(idx), addr, size); - } -} dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks) { @@ -246,63 +118,30 @@ dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks) const uintptr_t s = (uintptr_t)db->block; const uintptr_t e = (uintptr_t)db->block+db->size; if((uintptr_t)addr>=s && (uintptr_t)addr<e) - return db->father?db->father:db; + return db; ) return NULL; } -static dynablocklist_t* getDBFromAddress(uintptr_t addr) -{ - const uintptr_t idx = (addr>>DYNAMAP_SHIFT); - return getDB(idx); -} - -dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created) +dynablock_t *AddNewDynablock(uintptr_t addr) { - if(!dynablocks) { - dynarec_log(LOG_INFO, "Warning: Ask to create a dynablock with a NULL dynablocklist (addr=%p)\n", (void*)addr); - *created = 0; + dynablock_t* block; + #if 0 + // check if memory as the correct flags + int prot = getProtection(addr); + if(!(prot&(PROT_EXEC|PROT_DYNAREC|PROT_DYNAREC_R))) { + dynarec_log(LOG_VERBOSE, "Block asked on a memory with no execution flags 0x%02X\n", prot); return NULL; } - if((addr<dynablocks->text) || (addr>=(dynablocks->text+dynablocks->textsz))) { - return AddNewDynablock(getDBFromAddress(addr), addr, created); - } - dynablock_t* block = NULL; - // first, check if it exist in direct access mode - if(dynablocks->direct) { - block = dynablocks->direct[addr-dynablocks->text]; - if(block) { - dynarec_log(LOG_VERBOSE, "Block already exist in Direct Map\n"); - *created = 0; - return block; - } - } - - if (!*created) - return block; + #endif pthread_mutex_lock(&my_context->mutex_dyndump); - if(!dynablocks->direct) { - dynablock_t** p = (dynablock_t**)customCalloc(dynablocks->textsz, sizeof(dynablock_t*)); - if(native_lock_storeifnull(&dynablocks->direct, p)!=p) - customFree(p); // someone already create the direct array, too late... - } // create and add new block dynarec_log(LOG_VERBOSE, "Ask for DynaRec Block creation @%p\n", (void*)addr); block = (dynablock_t*)customCalloc(1, sizeof(dynablock_t)); - block->parent = dynablocks; - dynablock_t* tmp = (dynablock_t*)native_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block); - if(tmp != block) { - // a block appeard! - pthread_mutex_unlock(&my_context->mutex_dyndump); - customFree(block); - *created = 0; - return tmp; - } - *created = 1; pthread_mutex_lock(&my_context->mutex_dyndump); return block; } @@ -319,39 +158,26 @@ void cancelFillBlock() return NULL if block is not found / cannot be created. Don't create if create==0 */ -static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, dynablock_t* current, int need_lock) +static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, int need_lock) { - // try the quickest way first: get parent of current and check if ok! - dynablocklist_t *dynablocks = NULL; - dynablock_t* block = NULL; - if(current && current->done && !current->gone) { - dynablocks = current->parent; - if(dynablocks && !(addr>=dynablocks->text && addr<(dynablocks->text+dynablocks->textsz))) - dynablocks = NULL; - } - // nope, lets do the long way - if(!dynablocks) { - dynablocks = getDBFromAddress(addr); - if(!dynablocks) { - dynablocks = GetDynablocksFromAddress(emu->context, addr); - if(!dynablocks) - return NULL; - } - } - // check direct first, without lock - if(dynablocks->direct/* && (addr>=dynablocks->text) && (addr<(dynablocks->text+dynablocks->textsz))*/) - if((block = dynablocks->direct[addr-dynablocks->text])) - return block; + dynablock_t* block = getDB(addr); + if(block || !create) + return block; - int created = create; - block = AddNewDynablock(dynablocks, addr, &created); - if(!created) - return block; // existing block... + if(need_lock) + pthread_mutex_lock(&my_context->mutex_dyndump); + + block = getDB(addr); // just in case + if(block) { + if(need_lock) + pthread_mutex_unlock(&my_context->mutex_dyndump); + return block; + } + + block = AddNewDynablock(addr); // fill the block block->x64_addr = (void*)addr; - if(need_lock) - pthread_mutex_lock(&my_context->mutex_dyndump); if(sigsetjmp(&dynarec_jmpbuf, 1)) { printf_log(LOG_INFO, "FillBlock at %p triggered a segfault, cancelling\n", (void*)addr); if(need_lock) @@ -359,209 +185,59 @@ static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t return NULL; } void* ret = FillBlock64(block, filladdr); - if(need_lock) - pthread_mutex_unlock(&my_context->mutex_dyndump); if(!ret) { dynarec_log(LOG_DEBUG, "Fillblock of block %p for %p returned an error\n", block, (void*)addr); - void* old = (void*)native_lock_storeifref(&dynablocks->direct[addr-dynablocks->text], 0, block); - if(old!=block && old) {// put it back in place, strange things are happening here! - dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old); - // doing nothing else, the block has not be writen - } customFree(block); block = NULL; } // check size - if(block && block->x64_size) { - if(dynablocks->minstart>addr) - dynablocks->minstart = addr; + if(block && (block->x64_size || (!block->x64_size && !block->done))) { int blocksz = block->x64_size; - if(dynablocks->maxend<addr+blocksz) { - dynablocks->maxend = addr+blocksz; - for(unsigned idx=(addr>>DYNAMAP_SHIFT)+1; idx<=((addr+blocksz-1)>>DYNAMAP_SHIFT); ++idx) { - dynablocklist_t* dblist; - if((dblist = getDB(idx))) - if(dblist->minstart>addr) - dblist->minstart = addr; - } - } + if(blocksz>my_context->max_db_size) + my_context->max_db_size = blocksz; // fill-in jumptable - addJumpTableIfDefault64(block->x64_addr, block->block); - for(int i=0; i<block->sons_size; ++i) { - addJumpTableIfDefault64(block->sons[i]->x64_addr, block->sons[i]->block); - block->sons[i]->done = 1; + if(!addJumpTableIfDefault64(block->x64_addr, block->block)) { + FreeDynablock(block, 1); + block = getDB(addr); + } else { + if(block->x64_size) + block->done = 1; // don't validate the block if the size is null, but keep the block } - block->done = 1; } + if(need_lock) + pthread_mutex_unlock(&my_context->mutex_dyndump); - dynarec_log(LOG_DEBUG, "%04d| --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", GetTID(), created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:1)-1), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0); + dynarec_log(LOG_DEBUG, "%04d| --- DynaRec Block created @%p:%p (%p, 0x%x bytes)\n", GetTID(), (void*)addr, (void*)(addr+((block)?block->x64_size:1)-1), (block)?block->block:0, (block)?block->size:0); return block; } -#define MAX_HOTPAGE 64 -#define HOTPAGE_STEP 64 -static int volatile hotpage_count[MAX_HOTPAGE] = {0}; -static uintptr_t volatile hotpage[MAX_HOTPAGE] = {0}; -static uintptr_t volatile hotpage_size[MAX_HOTPAGE] = {0}; -static volatile int hotpages = 0; - -int IsInHotPage(uintptr_t addr) { - if(!hotpages) - return 0; - for(int i=0; i<MAX_HOTPAGE; ++i) { - if((hotpage_count[i]>0) && (addr>=hotpage[i]) && (addr<hotpage[i]+0x1000*(hotpage_size[i]+1))) { - --hotpage_count[i]; - if(!hotpage_count[i]) { - --hotpages; - hotpage_size[i] = 0; - dynarec_log(LOG_DEBUG, "End of Hotpage %p\n", (void*)hotpage[i]); - } - __sync_synchronize(); - return 1; - } - } - return 0; -} - -int AreaInHotPage(uintptr_t start, uintptr_t end) { - if(!hotpages) - return 0; - for(int i=0; i<MAX_HOTPAGE; ++i) { - if(hotpage_count[i]>0) - if(IntervalIntersects(start, end, hotpage[i], hotpage[i]+0x1000*(hotpage_size[i]+1)-1)) { - --hotpage_count[i]; - if(!hotpage_count[i]) { - --hotpages; - hotpage_size[i] = 0; - dynarec_log(LOG_DEBUG, "End of Hotpage %p\n", (void*)hotpage[i]); - } - return 1; - } - } - return 0; -} - -void FuseHotPage(int idx) { - uintptr_t start = hotpage[idx]; - uintptr_t end = start+0x1000*(hotpage_size[idx]+1); - for(int i=0; i<MAX_HOTPAGE; ++i) - if(i!=idx && hotpage_count[i]>0) { - if(IntervalIntersects(start, end, hotpage[i], hotpage[i]+0x1000*(hotpage_size[i]+1)-1)) { - if(hotpage_count[i]>hotpage_count[idx]) - hotpage_count[idx] = hotpage_count[i]; - if(hotpage[i]>hotpage[idx]) - hotpage[idx]=hotpage[i]; - if(hotpage[i]+0x1000*(hotpage_size[i]+1)>end) - hotpage_size[idx] = ((hotpage[i]+0x1000*(hotpage_size[i]+1))-hotpage[idx])/0x1000 - 1; - hotpage_count[i] = 0; - return; - } - } -} - -void AddHotPage(uintptr_t addr) { - addr&=~0xfff; - // look for same address - for(int i=0; i<MAX_HOTPAGE; ++i) { - if(addr>=hotpage[i] && addr<hotpage[i]+0x1000*(hotpage_size[i]+1)) { - if(!hotpage_count[i]) - ++hotpages; - hotpage_count[i] = HOTPAGE_STEP; - __sync_synchronize(); - return; - } - if(addr==hotpage[i]+0x1000*(hotpage_size[i]+1)) { - ++hotpage_size[i]; - hotpage_count[i] = HOTPAGE_STEP; - FuseHotPage(i); - __sync_synchronize(); - return; - } - if(addr+0x1000==hotpage[i]) { - ++hotpage_size[i]; - hotpage[i] = addr; - hotpage_count[i] = HOTPAGE_STEP; - __sync_synchronize(); - return; - } - if(addr==hotpage[i]+0x1000*(hotpage_size[i]+2)) { - hotpage_size[i]+=2; - hotpage_count[i] = HOTPAGE_STEP; - FuseHotPage(i); - __sync_synchronize(); - return; - } - if(addr+0x2000==hotpage[i]) { - hotpage_size[i]+=2; - hotpage[i] = addr; - hotpage_count[i] = HOTPAGE_STEP; - FuseHotPage(i); - __sync_synchronize(); - return; - } - } - // look for empty spot / minium - int mincnt = hotpage_count[0]*(hotpage_size[0]+1); - int minidx = 0; - for(int i=1; i<MAX_HOTPAGE; ++i) - if((hotpage_count[i]*(hotpage_size[i]+1))<mincnt) { - mincnt = (hotpage_count[i]*(hotpage_size[i]+1)); - minidx = i; - } - if(hotpage_count[minidx]) { - static int cnt = 0; - if(cnt<50) { - dynarec_log(LOG_NONE, "Warning, not enough Hotpage, replacing %p(%p/%d) with %p\n", (void*)hotpage[minidx], (void*)(0x1000*(hotpage_size[minidx]+1)), hotpage_count[minidx], (void*)addr); - ++cnt; - if(cnt==50) // stop spamming console with message... - dynarec_log(LOG_NONE, " will stop warning about not enough Hotpage now\n"); - } - hotpage_size[minidx] = 0; - } else - ++hotpages; - hotpage[minidx] = addr; - hotpage_count[minidx] = HOTPAGE_STEP; - __sync_synchronize(); -} - -dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current) +dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create) { - dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, *current, 1); - dynablock_t *father = (db && db->father)?db->father:db; - if(father && father->done && db->block && father->need_test) { - if(pthread_mutex_trylock(&my_context->mutex_dyndump)) { - dynarec_log(LOG_DEBUG, "mutex_dyndump not available when trying to validate block %p from %p:%p (hash:%X) with %d son(s) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, db->sons_size, (void*)addr); + dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, 1); + if(db && db->done && db->block && db->need_test) { + if(AreaInHotPage((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr + db->x64_size - 1)) { + dynarec_log(LOG_DEBUG, "Not running block %p from %p:%p with for %p because it's in a hotpage\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, (void*)addr); return NULL; } - if(AreaInHotPage((uintptr_t)father->x64_addr, (uintptr_t)father->x64_addr + father->x64_size - 1)) { - dynarec_log(LOG_DEBUG, "Not running block %p from %p:%p with %d son(s) for %p because it's in a hotpage\n", father, father->x64_addr, father->x64_addr+father->x64_size-1, father->sons_size, (void*)addr); - pthread_mutex_unlock(&my_context->mutex_dyndump); + uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size); + if(pthread_mutex_trylock(&my_context->mutex_dyndump)) { + dynarec_log(LOG_DEBUG, "mutex_dyndump not available when trying to validate block %p from %p:%p (hash:%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, (void*)addr); return NULL; } - uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size); - if(hash!=father->hash) { - father->done = 0; // invalidating the block - dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size-1, hash, father->hash, father->sons_size, (void*)addr); - // no more current if it gets invalidated too - if(*current && IntervalIntersects( - (uintptr_t)father->x64_addr, - (uintptr_t)father->x64_addr+father->x64_size-1, - (uintptr_t)(*current)->x64_addr, - (uintptr_t)(*current)->x64_addr+(*current)->x64_size-1)) - *current = NULL; - // Free father, it's now invalid! - FreeDynablock(father, 0); + if(hash!=db->hash) { + db->done = 0; // invalidating the block + dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, hash, db->hash, (void*)addr); + // Free db, it's now invalid! + FreeDynablock(db, 0); // start again... (will create a new block) - db = internalDBGetBlock(emu, addr, addr, create, *current, 0); + db = internalDBGetBlock(emu, addr, addr, create, 0); } else { - father->need_test = 0; - dynarec_log(LOG_DEBUG, "Validating block %p from %p:%p (hash:%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size-1, father->hash, father->sons_size, (void*)addr); - protectDB((uintptr_t)father->x64_addr, father->x64_size); + db->need_test = 0; + dynarec_log(LOG_DEBUG, "Validating block %p from %p:%p (hash:%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, (void*)addr); + protectDB((uintptr_t)db->x64_addr, db->x64_size); // fill back jumptable - addJumpTableIfDefault64(father->x64_addr, father->block); - for(int i=0; i<father->sons_size; ++i) - addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block); + setJumpTableIfRef64(db->x64_addr, db->block, db->jmpnext); } pthread_mutex_unlock(&my_context->mutex_dyndump); } @@ -572,26 +248,23 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr) { dynarec_log(LOG_DEBUG, "Creating AlternateBlock at %p for %p\n", (void*)addr, (void*)filladdr); int create = 1; - dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, NULL, 1); - dynablock_t *father = (db && db->father)?db->father:db; - if(father && father->done && db->block && father->need_test) { + dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, 1); + if(db && db->done && db->block && db->need_test) { if(pthread_mutex_trylock(&my_context->mutex_dyndump)) return NULL; - uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size); - if(hash!=father->hash) { - father->done = 0; // invalidating the block - dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr); - // Free father, it's now invalid! - FreeDynablock(father, 0); + uint32_t hash = X31_hash_code(db->x64_addr, db->x64_size); + if(hash!=db->hash) { + db->done = 0; // invalidating the block + dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size, hash, db->hash, (void*)addr); + // Free db, it's now invalid! + FreeDynablock(db, 0); // start again... (will create a new block) - db = internalDBGetBlock(emu, addr, filladdr, create, NULL, 0); + db = internalDBGetBlock(emu, addr, filladdr, create, 0); } else { - father->need_test = 0; - protectDB((uintptr_t)father->x64_addr, father->x64_size); + db->need_test = 0; + protectDB((uintptr_t)db->x64_addr, db->x64_size); // fill back jumptable - addJumpTableIfDefault64(father->x64_addr, father->block); - for(int i=0; i<father->sons_size; ++i) - addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block); + addJumpTableIfDefault64(db->x64_addr, db->block); } pthread_mutex_unlock(&my_context->mutex_dyndump); } diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h index e73d35ba..dbe8c502 100755 --- a/src/dynarec/dynablock_private.h +++ b/src/dynarec/dynablock_private.h @@ -1,16 +1,14 @@ #ifndef __DYNABLOCK_PRIVATE_H_ #define __DYNABLOCK_PRIVATE_H_ -typedef struct dynablocklist_s dynablocklist_t; - typedef struct instsize_s { - unsigned int x64:4; - unsigned int nat:4; + unsigned char x64:4; + unsigned char nat:4; } instsize_t; typedef struct dynablock_s { - dynablocklist_t* parent; - void* block; + void* block; // block-sizeof(void*) == self + void* actual_block; // the actual start of the block (so block-sizeof(void*)) int size; void* x64_addr; uintptr_t x64_size; @@ -20,18 +18,8 @@ typedef struct dynablock_s { uint8_t gone; uint8_t dummy; int isize; - dynablock_t** sons; // sons (kind-of dummy dynablock...) - int sons_size; - dynablock_t* father; // set only in the case of a son instsize_t* instsize; + void* jmpnext; // a branch jmpnext code when block is marked } dynablock_t; -typedef struct dynablocklist_s { - uintptr_t text; - int textsz; - uintptr_t maxend; // max address end for anyblock on this blocklist - uintptr_t minstart; // min start address for block overlapping this blocklist - dynablock_t** direct; // direct mapping (waste of space, so the array is created at first write) -} dynablocklist_t; - #endif //__DYNABLOCK_PRIVATE_H_ \ No newline at end of file diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c index 819ef219..920bfaa8 100755 --- a/src/dynarec/dynarec.c +++ b/src/dynarec/dynarec.c @@ -20,22 +20,7 @@ #include "dynablock.h" #include "dynablock_private.h" #include "bridge.h" -#endif - -#ifdef DYNAREC -#ifdef ARM64 -void arm64_prolog(x64emu_t* emu, void* addr) EXPORTDYN; -void arm64_epilog() EXPORTDYN; -#define native_prolog arm64_prolog -#define native_epilog arm64_epilog -#elif defined(LA464) -void la464_prolog(x64emu_t* emu, void* addr) EXPORTDYN; -void la464_epilog() EXPORTDYN; -#define native_prolog la464_prolog -#define native_epilog la464_epilog -#else -#error Unsupported architecture -#endif +#include "dynarec_next.h" #endif #ifdef DYNAREC @@ -49,9 +34,8 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2, uintptr_t* x3) printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x64addr=%p/%s)\n", x2-4, db, db?(void*)getX64Address(db, (uintptr_t)x2-4):NULL, db?getAddrFunctionName(getX64Address(db, (uintptr_t)x2-4)):"(nil)"); } #endif - dynablock_t* current = NULL; void * jblock; - dynablock_t* block = DBGetBlock(emu, addr, 1, ¤t); + dynablock_t* block = DBGetBlock(emu, addr, 1); if(!block) { // no block, let link table as is... if(hasAlternate((void*)addr)) { @@ -60,7 +44,7 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2, uintptr_t* x3) R_RIP = addr; // but also new RIP! *x3 = addr; // and the RIP in x27 register printf_log(LOG_DEBUG, " -> %p\n", (void*)addr); - block = DBGetBlock(emu, addr, 1, ¤t); + block = DBGetBlock(emu, addr, 1); } if(!block) { #ifdef HAVE_TRACE @@ -121,23 +105,18 @@ void DynaCall(x64emu_t* emu, uintptr_t addr) PushExit(emu); R_RIP = addr; emu->df = d_none; - dynablock_t* current = NULL; - dynablock_t* block = DBGetBlock(emu, R_RIP, 1, ¤t); - current = block; while(!emu->quit) { + dynablock_t* block = DBGetBlock(emu, R_RIP, 1); if(!block || !block->block || !block->done) { // no block, of block doesn't have DynaRec content (yet, temp is not null) // Use interpreter (should use single instruction step...) dynarec_log(LOG_DEBUG, "%04d|Calling Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu); Run(emu, 1); - block = DBGetBlock(emu, R_RIP, 1, ¤t); - current = block; } else { - dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu); + dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,emu); CHECK_FLAGS(emu); // block is here, let's run it! native_prolog(emu, block->block); - block = NULL; } if(emu->fork) { int forktype = emu->fork; @@ -201,22 +180,17 @@ int DynaRun(x64emu_t* emu) return Run(emu, 0); #ifdef DYNAREC else { - dynablock_t* current = NULL; - dynablock_t* block = DBGetBlock(emu, R_RIP, 1, ¤t);; - current = block; while(!emu->quit) { + dynablock_t* block = DBGetBlock(emu, R_RIP, 1); if(!block || !block->block || !block->done) { // no block, of block doesn't have DynaRec content (yet, temp is not null) // Use interpreter (should use single instruction step...) dynarec_log(LOG_DEBUG, "%04d|Running Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu); Run(emu, 1); - block = DBGetBlock(emu, R_RIP, 1, ¤t); - current = block; } else { - dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu); + dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, emu); // block is here, let's run it! native_prolog(emu, block->block); - block = NULL; } if(emu->fork) { int forktype = emu->fork; diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 9da18cf6..890b2397 100755 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -23,6 +23,7 @@ #include "dynarec_native.h" #include "dynarec_arch.h" +#include "dynarec_next.h" void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) { uint8_t *ip = (uint8_t*)inst->addr; @@ -279,7 +280,7 @@ int Table64(dynarec_native_t *dyn, uint64_t val) // not found, add it if(idx==-1) { if(dyn->table64size == dyn->table64cap) { - dyn->table64cap+=4; + dyn->table64cap+=16; dyn->table64 = (uint64_t*)customRealloc(dyn->table64, dyn->table64cap * sizeof(uint64_t)); } idx = dyn->table64size++; @@ -385,9 +386,12 @@ void CancelBlock64() return; customFree(helper->next); customFree(helper->insts); + customFree(helper->instsize); customFree(helper->table64); - if(helper->dynablock && helper->dynablock->block) - FreeDynarecMap(helper->dynablock, (uintptr_t)helper->dynablock->block, helper->dynablock->size); + if(helper->dynablock && helper->dynablock->actual_block) + FreeDynarecMap(helper->dynablock, (uintptr_t)helper->dynablock->actual_block, helper->dynablock->size); + else if(helper->dynablock && helper->block) + FreeDynarecMap(helper->dynablock, (uintptr_t)helper->block-sizeof(void*), helper->dynablock->size); } uintptr_t native_pass0(dynarec_native_t* dyn, uintptr_t addr); @@ -395,15 +399,38 @@ uintptr_t native_pass1(dynarec_native_t* dyn, uintptr_t addr); uintptr_t native_pass2(dynarec_native_t* dyn, uintptr_t addr); uintptr_t native_pass3(dynarec_native_t* dyn, uintptr_t addr); +void* CreateEmptyBlock(dynablock_t* block, uintptr_t addr) { + block->isize = 0; + block->done = 0; + size_t sz = 4*sizeof(void*); + void* actual_p = (void*)AllocDynarecMap(block, sz); + void* p = actual_p + sizeof(void*); + if(actual_p==NULL) { + dynarec_log(LOG_INFO, "AllocDynarecMap(%p, %zu) failed, cancelling block\n", block, sz); + CancelBlock64(); + return NULL; + } + block->size = sz; + block->actual_block = actual_p; + block->block = p; + block->jmpnext = p; + *(dynablock_t**)actual_p = block; + *(void**)(p+2*sizeof(void*)) = native_epilog; + CreateJmpNext(block->jmpnext, p+2*sizeof(void*)); + block->need_test = 0; + // all done... + __clear_cache(actual_p, actual_p+sz); // need to clear the cache before execution... + return block; +} + void* FillBlock64(dynablock_t* block, uintptr_t addr) { if(IsInHotPage(addr)) { dynarec_log(LOG_DEBUG, "Cancelling dynarec FillBlock on hotpage for %p\n", (void*)addr); return NULL; } if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end) { - dynarec_log(LOG_INFO, "Stopping block in no-dynarec zone\n"); - block->done = 1; - return (void*)block; + dynarec_log(LOG_INFO, "Create empty block in no-dynarec zone\n"); + return CreateEmptyBlock(block, addr); } // protect the 1st page protectDB(addr, 1); @@ -425,7 +452,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { if(!helper.size) { dynarec_log(LOG_INFO, "Warning, null-sized dynarec block (%p)\n", (void*)addr); CancelBlock64(); - return (void*)block; + return CreateEmptyBlock(block, addr);; } if(!isprotectedDB(addr, 1)) { dynarec_log(LOG_INFO, "Warning, write on current page on pass0, aborting dynablock creation (%p)\n", (void*)addr); @@ -458,44 +485,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { } // fill predecessors with the jump address fillPredecessors(&helper); - // check for the optionnal barriers now - /*for(int i=helper.size-1; i>=0; --i) { - if(helper.insts[i].barrier_maybe) { - // out-of-block jump - if(helper.insts[i].x64.jmp_insts == -1) { - // nothing for now - } else { - // inside block jump - int k = helper.insts[i].x64.jmp_insts; - if(k>i) { - // jump in the future - if(helper.insts[k].pred_sz>1) { - // with multiple flow, put a barrier - helper.insts[k].x64.barrier|=BARRIER_FLAGS; - } - } else { - // jump back - helper.insts[k].x64.barrier|=BARRIER_FLAGS; - } - } - } - }*/ - // check to remove useless barrier, in case of jump when destination doesn't needs flags - /*for(int i=helper.size-1; i>=0; --i) { - int k; - if(helper.insts[i].x64.jmp - && ((k=helper.insts[i].x64.jmp_insts)>=0) - && helper.insts[k].x64.barrier&BARRIER_FLAGS) { - //TODO: optimize FPU barrier too - if((!helper.insts[k].x64.need_flags) - ||(helper.insts[k].x64.set_flags==X_ALL - && helper.insts[k].x64.state_flags==SF_SET) - ||(helper.insts[k].x64.state_flags==SF_SET_PENDING)) { - //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "Removed barrier for inst %d\n", k); - helper.insts[k].x64.barrier &= ~BARRIER_FLAGS; // remove flag barrier - } - } - }*/ + int pos = helper.size; while (pos>=0) pos = updateNeed(&helper, pos, 0); @@ -505,10 +495,28 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { // pass 2, instruction size native_pass2(&helper, addr); + // keep size of instructions for signal handling + size_t insts_rsize = 0; + { + size_t insts_size = 0; + size_t cap = 1; + for(int i=0; i<helper.size; ++i) + cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; + helper.instsize = (instsize_t*)customCalloc(cap, sizeof(instsize_t)); + for(int i=0; i<helper.size; ++i) + helper.instsize = addInst(helper.instsize, &insts_size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4); + helper.instsize = addInst(helper.instsize, &insts_size, &cap, 0, 0); // add a "end of block" mark, just in case + insts_rsize = insts_size*sizeof(instsize_t); + } + insts_rsize = (insts_rsize+7)&~7; // round the size... // ok, now allocate mapped memory, with executable flag on - size_t sz = helper.native_size + helper.table64size*sizeof(uint64_t); - void* p = (void*)AllocDynarecMap(block, sz); - if(p==NULL) { + size_t sz = sizeof(void*) + helper.native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize; + // dynablock_t* block (arm insts) table64 jmpnext code instsize + void* actual_p = (void*)AllocDynarecMap(block, sz); + void* p = actual_p + sizeof(void*); + void* next = p + helper.native_size + helper.table64size*sizeof(uint64_t); + void* instsize = next + 4*sizeof(void*); + if(actual_p==NULL) { dynarec_log(LOG_INFO, "AllocDynarecMap(%p, %zu) failed, cancelling block\n", block, sz); CancelBlock64(); return NULL; @@ -516,11 +524,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { helper.block = p; helper.native_start = (uintptr_t)p; helper.tablestart = helper.native_start + helper.native_size; - if(helper.sons_size) { - helper.sons_x64 = (uintptr_t*)alloca(helper.sons_size*sizeof(uintptr_t)); - helper.sons_native = (void**)alloca(helper.sons_size*sizeof(void*)); - } - int pass2_sons_size = helper.sons_size; + *(dynablock_t**)actual_p = block; // pass 3, emit (log emit native opcode) if(box64_dynarec_dump) { dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize); @@ -550,30 +554,29 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { if(helper.table64size) { memcpy((void*)helper.tablestart, helper.table64, helper.table64size*8); } - // all done... - __clear_cache(p, p+sz); // need to clear the cache before execution... // keep size of instructions for signal handling - { - size_t cap = 1; - for(int i=0; i<helper.size; ++i) - cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; - size_t size = 0; - block->instsize = (instsize_t*)customCalloc(cap, sizeof(instsize_t)); - for(int i=0; i<helper.size; ++i) - block->instsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4); - block->instsize = addInst(block->instsize, &size, &cap, 0, 0); // add a "end of block" mark, just in case - } + memcpy(instsize, helper.instsize, insts_rsize); + block->instsize = instsize; // ok, free the helper now customFree(helper.insts); helper.insts = NULL; customFree(helper.table64); helper.table64 = NULL; + customFree(helper.instsize); + helper.instsize = NULL; block->size = sz; block->isize = helper.size; + block->actual_block = actual_p; block->block = p; + block->jmpnext = next+sizeof(void*); + *(dynablock_t**)next = block; + *(void**)(next+2*sizeof(void*)) = native_next; + CreateJmpNext(block->jmpnext, next+2*sizeof(void*)); block->need_test = 0; //block->x64_addr = (void*)start; block->x64_size = end-start; + // all done... + __clear_cache(actual_p, actual_p+sz); // need to clear the cache before execution... block->hash = X31_hash_code(block->x64_addr, block->x64_size); // Check if something changed, to abbort if it as if((block->hash != hash)) { @@ -587,36 +590,6 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr) { return NULL; //protectDB(addr, end-addr); } - // fill sons if any - dynablock_t** sons = NULL; - int sons_size = 0; - if(pass2_sons_size != helper.sons_size) - dynarec_log(LOG_NONE, "Warning, sons size difference bitween pass2:%d and pass3:%d\n", pass2_sons_size, helper.sons_size); - if(helper.sons_size) { - sons = (dynablock_t**)customCalloc(helper.sons_size, sizeof(dynablock_t*)); - for (int i=0; i<helper.sons_size; ++i) { - int created = 1; - dynablock_t *son = AddNewDynablock(block->parent, helper.sons_x64[i], &created); - if(created) { // avoid breaking a working block! - son->block = helper.sons_native[i]; - son->x64_addr = (void*)helper.sons_x64[i]; - son->x64_size = end-helper.sons_x64[i]; - if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / Native=%p)", son->x64_addr, son->block);} - son->father = block; - son->size = sz + son->block - block->block; // update size count, for debugging - //son->done = 1; - if(!son->parent) - son->parent = block->parent; - sons[sons_size] = son; - ++sons_size; - } - } - if(sons_size) { - block->sons = sons; - block->sons_size = sons_size; - } else - customFree(sons); - } current_helper = NULL; //block->done = 1; return (void*)block; diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index b3357f8d..7ab25a6f 100755 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -37,7 +37,6 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr) rex_t rex; int rep; // 0 none, 1=F2 prefix, 2=F3 prefix int need_epilog = 1; - dyn->sons_size = 0; // Clean up (because there are multiple passes) dyn->f.pending = 0; dyn->f.dfnone = 0; @@ -248,6 +247,6 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr) jump_to_epilog(dyn, ip, 0, ninst); // no linker here, it's an unknow instruction } FINI; - MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d, %d sons)\n", dyn->size, dyn->sons_size); + MESSAGE(LOG_DUMP, "---- END OF BLOCK ---- (%d)\n", dyn->size); return addr; } diff --git a/src/dynarec/dynarec_next.h b/src/dynarec/dynarec_next.h new file mode 100644 index 00000000..19d426b8 --- /dev/null +++ b/src/dynarec/dynarec_next.h @@ -0,0 +1,22 @@ +#ifndef __DYNAREC_NEXT_H__ +#define __DYNAREC_NEXT_H__ + +#ifdef ARM64 +void arm64_next(void) EXPORTDYN; +void arm64_prolog(x64emu_t* emu, void* addr) EXPORTDYN; +void arm64_epilog() EXPORTDYN; +#define native_next arm64_next +#define native_prolog arm64_prolog +#define native_epilog arm64_epilog +#elif defined(LA464) +void la464_next(void) EXPORTDYN; +void la464_prolog(x64emu_t* emu, void* addr) EXPORTDYN; +void la464_epilog() EXPORTDYN; +#define native_next la464_next +#define native_prolog la464_prolog +#define native_epilog la464_epilog +#else +#error Unsupported architecture +#endif + +#endif //__DYNAREC_NEXT_H__ \ No newline at end of file diff --git a/src/elfs/elfloader.c b/src/elfs/elfloader.c index 35a7d3a2..227ef075 100755 --- a/src/elfs/elfloader.c +++ b/src/elfs/elfloader.c @@ -1405,7 +1405,7 @@ void* GetDynamicSection(elfheader_t* h) } #ifdef DYNAREC -dynablocklist_t* GetDynablocksFromAddress(box64context_t *context, uintptr_t addr) +dynablock_t* GetDynablocksFromAddress(box64context_t *context, uintptr_t addr) { (void)context; // if we are here, the there is not block in standard "space" @@ -1415,7 +1415,7 @@ dynablocklist_t* GetDynablocksFromAddress(box64context_t *context, uintptr_t add }*/ if(box64_dynarec_forced) { addDBFromAddressRange(addr, 1); - return getDB(addr>>DYNAMAP_SHIFT); + return getDB(addr); } //check if address is in an elf... if yes, grant a block (should I warn) Dl_info info; diff --git a/src/elfs/elfloader_private.h b/src/elfs/elfloader_private.h index e75ba1ca..eb5e81af 100755 --- a/src/elfs/elfloader_private.h +++ b/src/elfs/elfloader_private.h @@ -1,10 +1,6 @@ #ifndef __ELFLOADER_PRIVATE_H_ #define __ELFLOADER_PRIVATE_H_ -#ifdef DYNAREC -typedef struct dynablocklist_s dynablocklist_t; -#endif - typedef struct library_s library_t; typedef struct needed_libs_s needed_libs_t; typedef struct kh_mapsymbols_s kh_mapsymbols_t; diff --git a/src/include/box64context.h b/src/include/box64context.h index 9eb130ec..1d7d8b58 100755 --- a/src/include/box64context.h +++ b/src/include/box64context.h @@ -26,7 +26,6 @@ typedef struct atfork_fnc_s { } atfork_fnc_t; #ifdef DYNAREC typedef struct dynablock_s dynablock_t; -typedef struct dynablocklist_s dynablocklist_t; typedef struct mmaplist_s mmaplist_t; typedef struct kh_dynablocks_s kh_dynablocks_t; #endif @@ -125,6 +124,7 @@ typedef struct box64context_s { pthread_mutex_t mutex_lock; // dynarec build will use their own mecanism #else pthread_mutex_t mutex_dyndump; + uintptr_t max_db_size; // the biggest (in x86_64 instructions bytes) built dynablock int trace_dynarec; #endif pthread_mutex_t mutex_tls; diff --git a/src/include/custommem.h b/src/include/custommem.h index c8a28fc5..0a8642f4 100644 --- a/src/include/custommem.h +++ b/src/include/custommem.h @@ -18,7 +18,6 @@ void customFree(void* p); #ifdef DYNAREC typedef struct dynablock_s dynablock_t; -typedef struct dynablocklist_s dynablocklist_t; // custom protection flag to mark Page that are Write protected for Dynarec purpose uintptr_t AllocDynarecMap(dynablock_t* db, size_t size); void FreeDynarecMap(dynablock_t* db, uintptr_t addr, size_t size); @@ -26,12 +25,15 @@ void FreeDynarecMap(dynablock_t* db, uintptr_t addr, size_t size); void addDBFromAddressRange(uintptr_t addr, size_t size); void cleanDBFromAddressRange(uintptr_t addr, size_t size, int destroy); -dynablocklist_t* getDB(uintptr_t idx); -void addJumpTableIfDefault64(void* addr, void* jmp); +dynablock_t* getDB(uintptr_t idx); +int addJumpTableIfDefault64(void* addr, void* jmp); // return 1 if write was succesfull +int setJumpTableIfRef64(void* addr, void* jmp, void* ref); // return 1 if write was succesfull void setJumpTableDefault64(void* addr); +void setJumpTableDefaultRef64(void* addr, void* jmp); int isJumpTableDefault64(void* addr); uintptr_t getJumpTable64(); uintptr_t getJumpTableAddress64(uintptr_t addr); +uintptr_t getJumpAddress64(uintptr_t addr); #endif #define PROT_DYNAREC 0x80 @@ -47,6 +49,9 @@ void loadProtectionFromMap(); void protectDB(uintptr_t addr, size_t size); void unprotectDB(uintptr_t addr, size_t size, int mark); // if mark==0, the blocks are not marked as potentially dirty int isprotectedDB(uintptr_t addr, size_t size); +int IsInHotPage(uintptr_t addr); +int AreaInHotPage(uintptr_t start, uintptr_t end); +void AddHotPage(uintptr_t addr); #endif void* find32bitBlock(size_t size); void* findBlockNearHint(void* hint, size_t size); diff --git a/src/include/dynablock.h b/src/include/dynablock.h index a2119643..7f28fbd7 100755 --- a/src/include/dynablock.h +++ b/src/include/dynablock.h @@ -3,35 +3,22 @@ typedef struct x64emu_s x64emu_t; typedef struct dynablock_s dynablock_t; -typedef struct dynablocklist_s dynablocklist_t; typedef struct kh_dynablocks_s kh_dynablocks_t; uint32_t X31_hash_code(void* addr, int len); -dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct); -void FreeDynablockList(dynablocklist_t** dynablocks); void FreeDynablock(dynablock_t* db, int need_lock); void MarkDynablock(dynablock_t* db); -//return 1 if Dynareblock is empty -int FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size); -void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size); +void MarkRangeDynablock(dynablock_t* db, uintptr_t addr, uintptr_t size); +int FreeRangeDynablock(dynablock_t* db, uintptr_t addr, uintptr_t size); dynablock_t* FindDynablockFromNativeAddress(void* addr); // defined in box64context.h dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks); -uintptr_t StartDynablockList(dynablocklist_t* db); -uintptr_t EndDynablockList(dynablocklist_t* db); -void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size); - // Handling of Dynarec block (i.e. an exectable chunk of x64 translated code) -dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current); // return NULL if block is not found / cannot be created. Don't create if create==0 +dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create); // return NULL if block is not found / cannot be created. Don't create if create==0 dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr); -// Create and Add an new dynablock in the list, handling direct/map -dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created); - // for use in signal handler void cancelFillBlock(); -int IsInHotPage(uintptr_t addr); -void AddHotPage(uintptr_t addr); #endif //__DYNABLOCK_H_ \ No newline at end of file diff --git a/src/include/elfloader.h b/src/include/elfloader.h index a4f59719..579c98bd 100755 --- a/src/include/elfloader.h +++ b/src/include/elfloader.h @@ -10,7 +10,7 @@ typedef struct box64context_s box64context_t; typedef struct x64emu_s x64emu_t; typedef struct needed_libs_s needed_libs_t; #ifdef DYNAREC -typedef struct dynablocklist_s dynablocklist_t; +typedef struct dynablock_s dynablock_t; #endif elfheader_t* LoadAndCheckElfHeader(FILE* f, const char* name, int exec); // exec : 0 = lib, 1 = exec @@ -50,8 +50,7 @@ uint32_t GetTLSSize(elfheader_t* h); void* GetTLSPointer(box64context_t* context, elfheader_t* h); void* GetDTatOffset(box64context_t* context, unsigned long int index, unsigned long int offset); #ifdef DYNAREC -dynablocklist_t* GetDynablocksFromAddress(box64context_t *context, uintptr_t addr); -dynablocklist_t* GetDynablocksFromElf(elfheader_t* h); +dynablock_t* GetDynablocksFromAddress(box64context_t *context, uintptr_t addr); #endif void ResetSpecialCaseMainElf(elfheader_t* h); void CreateMemorymapFile(box64context_t* context, int fd); diff --git a/src/libtools/threads.c b/src/libtools/threads.c index 01f3a7c4..88c56ed9 100755 --- a/src/libtools/threads.c +++ b/src/libtools/threads.c @@ -505,8 +505,7 @@ EXPORT int my_pthread_create(x64emu_t *emu, void* t, void* attr, void* start_rou #ifdef DYNAREC if(box64_dynarec) { // pre-creation of the JIT code for the entry point of the thread - dynablock_t *current = NULL; - DBGetBlock(emu, (uintptr_t)start_routine, 1, ¤t); + DBGetBlock(emu, (uintptr_t)start_routine, 1); } #endif // create thread @@ -527,8 +526,7 @@ void* my_prepare_thread(x64emu_t *emu, void* f, void* arg, int ssize, void** pet et->arg = arg; #ifdef DYNAREC // pre-creation of the JIT code for the entry point of the thread - dynablock_t *current = NULL; - DBGetBlock(emu, (uintptr_t)f, 1, ¤t); + DBGetBlock(emu, (uintptr_t)f, 1); #endif *pet = et; return pthread_routine; |