diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-14 17:58:04 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-14 17:58:04 +0100 |
| commit | 4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6 (patch) | |
| tree | e89e9892fa166aa348b8c9f902de7428e875c7bc /src | |
| parent | 3dda84e58b148f92b2bb4d94caacc84011fa3919 (diff) | |
| download | box64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.tar.gz box64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.zip | |
[DYNAREC] Added Basic blocks for dynarec
Diffstat (limited to 'src')
45 files changed, 2576 insertions, 614 deletions
diff --git a/src/custommem.c b/src/custommem.c index 8febe5a0..62f15664 100644 --- a/src/custommem.c +++ b/src/custommem.c @@ -23,19 +23,21 @@ #include "khash.h" #ifdef DYNAREC #include "dynablock.h" -#include "dynarec/arm_lock_helper.h" +#include "dynarec/arm64_lock_helper.h" -#define USE_MMAP +//#define USE_MMAP // init inside dynablocks.c KHASH_MAP_INIT_INT64(dynablocks, dynablock_t*) -static dynablocklist_t* dynmap[DYNAMAP_SIZE]; // 4G of memory mapped by 4K block +static dynablocklist_t*** dynmap123[1<<DYNAMAP_SHIFT]; // 64bits.. in 4x16bits array static pthread_mutex_t mutex_mmap; static mmaplist_t *mmaplist; static int mmapsize; static kh_dynablocks_t *dblist_oversized; // store the list of oversized dynablocks (normal sized are inside mmaplist) -static uintptr_t *box64_jumptable[JMPTABL_SIZE]; -static uintptr_t box64_jmptbl_default[1<<JMPTABL_SHIFT]; +static uintptr_t*** box64_jmptbl3[1<<JMPTABL_SHIFT]; +static uintptr_t** box64_jmptbldefault2[1<<JMPTABL_SHIFT]; +static uintptr_t* box64_jmptbldefault1[1<<JMPTABL_SHIFT]; +static uintptr_t box64_jmptbldefault0[1<<JMPTABL_SHIFT]; #endif #define MEMPROT_SHIFT 12 #define MEMPROT_SIZE (1<<(32-MEMPROT_SHIFT)) @@ -284,349 +286,433 @@ void customFree(void* p) } #ifdef DYNAREC -//typedef struct mmaplist_s { -// void* block; -// int maxfree; -// size_t size; -// kh_dynablocks_t* dblist; -// uint8_t* helper; -//} mmaplist_t; - -//uintptr_t FindFreeDynarecMap(dynablock_t* db, int size) -//{ -// // look for free space -// void* sub = NULL; -// for(int i=0; i<mmapsize; ++i) { -// if(mmaplist[i].maxfree>=size) { -// int rsize = 0; -// sub = getFirstBlock(mmaplist[i].block, size, &rsize); -// if(sub) { -// uintptr_t ret = (uintptr_t)allocBlock(mmaplist[i].block, sub, size); -// if(rsize==mmaplist[i].maxfree) -// mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); -// kh_dynablocks_t *blocks = mmaplist[i].dblist; -// if(!blocks) { -// blocks = mmaplist[i].dblist = kh_init(dynablocks); -// kh_resize(dynablocks, blocks, 64); -// } -// khint_t k; -// int r; -// k = kh_put(dynablocks, blocks, (uintptr_t)ret, &r); -// kh_value(blocks, k) = db; -// for(int j=0; j<size; ++j) -// mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+j] = (j<256)?j:255; -// return ret; -// } -// } -// } -// return 0; -//} - -//uintptr_t AddNewDynarecMap(dynablock_t* db, int size) -//{ -// int i = mmapsize++; // yeah, useful post incrementation -// dynarec_log(LOG_DEBUG, "Ask for DynaRec Block Alloc #%d\n", mmapsize); -// mmaplist = (mmaplist_t*)realloc(mmaplist, mmapsize*sizeof(mmaplist_t)); -// #ifndef USE_MMAP -// void *p = NULL; -// if(posix_memalign(&p, box64_pagesize, MMAPSIZE)) { -// dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i); -// --mmapsize; -// return 0; -// } -// mprotect(p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC); -// #else -// void* p = mmap(NULL, MMAPSIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); -// if(p==(void*)-1) { -// dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i); -// --mmapsize; -// return 0; -// } -// #endif -// setProtection((uintptr_t)p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC); -// -// mmaplist[i].block = p; -// mmaplist[i].size = MMAPSIZE; -// mmaplist[i].helper = (uint8_t*)calloc(1, MMAPSIZE); -// // setup marks -// blockmark_t* m = (blockmark_t*)p; -// m->prev.x32 = 0; -// m->next.fill = 0; -// m->next.size = MMAPSIZE-sizeof(blockmark_t); -// m = (blockmark_t*)(p+MMAPSIZE-sizeof(blockmark_t)); -// m->next.x32 = 0; -// m->prev.fill = 0; -// m->prev.size = MMAPSIZE-sizeof(blockmark_t); -// // alloc 1st block -// uintptr_t sub = (uintptr_t)allocBlock(mmaplist[i].block, p, size); -// mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); -// kh_dynablocks_t *blocks = mmaplist[i].dblist = kh_init(dynablocks); -// kh_resize(dynablocks, blocks, 64); -// khint_t k; -// int ret; -// k = kh_put(dynablocks, blocks, (uintptr_t)sub, &ret); -// kh_value(blocks, k) = db; -// for(int j=0; j<size; ++j) -// mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block + j] = (j<256)?j:255; -// return sub; -//} - -//void ActuallyFreeDynarecMap(dynablock_t* db, uintptr_t addr, int size) -//{ -// if(!addr || !size) -// return; -// for(int i=0; i<mmapsize; ++i) { -// if ((addr>(uintptr_t)mmaplist[i].block) -// && (addr<((uintptr_t)mmaplist[i].block+mmaplist[i].size))) { -// void* sub = (void*)(addr-sizeof(blockmark_t)); -// freeBlock(mmaplist[i].block, sub); -// mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); -// kh_dynablocks_t *blocks = mmaplist[i].dblist; -// if(blocks) { -// khint_t k = kh_get(dynablocks, blocks, (uintptr_t)sub); -// if(k!=kh_end(blocks)) -// kh_del(dynablocks, blocks, k); -// for(int j=0; j<size; ++j) -// mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block+j] = 0; -// } -// return; -// } -// } -// if(mmapsize) -// dynarec_log(LOG_NONE, "Warning, block %p (size %d) not found in mmaplist for Free\n", (void*)addr, size); -//} - -//dynablock_t* FindDynablockFromNativeAddress(void* addr) -//{ -// // look in actual list -// for(int i=0; i<mmapsize; ++i) { -// if ((uintptr_t)addr>=(uintptr_t)mmaplist[i].block -// && ((uintptr_t)addr<(uintptr_t)mmaplist[i].block+mmaplist[i].size)) { -// if(!mmaplist[i].helper) -// return FindDynablockDynablocklist(addr, mmaplist[i].dblist); -// else { -// uintptr_t p = (uintptr_t)addr - (uintptr_t)mmaplist[i].block; -// while(mmaplist[i].helper[p]) p -= mmaplist[i].helper[p]; -// khint_t k = kh_get(dynablocks, mmaplist[i].dblist, (uintptr_t)mmaplist[i].block + p); -// if(k!=kh_end(mmaplist[i].dblist)) -// return kh_value(mmaplist[i].dblist, k); -// return NULL; -// } -// } -// } -// // look in oversized -// return FindDynablockDynablocklist(addr, dblist_oversized); -//} - -//uintptr_t AllocDynarecMap(dynablock_t* db, int size) -//{ -// if(!size) -// return 0; -// if(size>MMAPSIZE-2*sizeof(blockmark_t)) { -// #ifndef USE_MMAP -// void *p = NULL; -// if(posix_memalign(&p, box64_pagesize, size)) { -// dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size); -// return 0; -// } -// mprotect(p, size, PROT_READ | PROT_WRITE | PROT_EXEC); -// #else -// void* p = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); -// if(p==(void*)-1) { -// dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size); -// return 0; -// } -// #endif -// setProtection((uintptr_t)p, size, PROT_READ | PROT_WRITE | PROT_EXEC); -// kh_dynablocks_t *blocks = dblist_oversized; -// if(!blocks) { -// blocks = dblist_oversized = kh_init(dynablocks); -// kh_resize(dynablocks, blocks, 64); -// } -// khint_t k; -// int ret; -// k = kh_put(dynablocks, blocks, (uintptr_t)p, &ret); -// kh_value(blocks, k) = db; -// return (uintptr_t)p; -// } -// -// if(pthread_mutex_trylock(&mutex_mmap)) { -// sched_yield(); // give it a chance -// if(pthread_mutex_trylock(&mutex_mmap)) -// return 0; // cannot lock, baillout -// } -// -// uintptr_t ret = FindFreeDynarecMap(db, size); -// if(!ret) -// ret = AddNewDynarecMap(db, size); -// -// pthread_mutex_unlock(&mutex_mmap); -// -// return ret; -//} - -//void FreeDynarecMap(dynablock_t* db, uintptr_t addr, uint32_t size) -//{ -// if(size>MMAPSIZE-2*sizeof(blockmark_t)) { -// #ifndef USE_MMAP -// free((void*)addr); -// #else -// munmap((void*)addr, size); -// #endif -// kh_dynablocks_t *blocks = dblist_oversized; -// if(blocks) { -// khint_t k = kh_get(dynablocks, blocks, addr); -// if(k!=kh_end(blocks)) -// kh_del(dynablocks, blocks, k); -// } -// return; -// } -// pthread_mutex_lock(&mutex_mmap); -// ActuallyFreeDynarecMap(db, addr, size); -// pthread_mutex_unlock(&mutex_mmap); -//} - -//dynablocklist_t* getDB(uintptr_t idx) -//{ -// return dynmap[idx]; -//} +typedef struct mmaplist_s { + void* block; + int maxfree; + size_t size; + kh_dynablocks_t* dblist; + uint8_t* helper; +} mmaplist_t; + +uintptr_t FindFreeDynarecMap(dynablock_t* db, int size) +{ + // look for free space + void* sub = NULL; + for(int i=0; i<mmapsize; ++i) { + if(mmaplist[i].maxfree>=size) { + int rsize = 0; + sub = getFirstBlock(mmaplist[i].block, size, &rsize); + if(sub) { + uintptr_t ret = (uintptr_t)allocBlock(mmaplist[i].block, sub, size); + if(rsize==mmaplist[i].maxfree) + mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); + kh_dynablocks_t *blocks = mmaplist[i].dblist; + if(!blocks) { + blocks = mmaplist[i].dblist = kh_init(dynablocks); + kh_resize(dynablocks, blocks, 64); + } + khint_t k; + int r; + k = kh_put(dynablocks, blocks, (uintptr_t)ret, &r); + kh_value(blocks, k) = db; + for(int j=0; j<size; ++j) + mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+j] = (j<256)?j:255; + return ret; + } + } + } + return 0; +} + +uintptr_t AddNewDynarecMap(dynablock_t* db, int size) +{ + int i = mmapsize++; + dynarec_log(LOG_DEBUG, "Ask for DynaRec Block Alloc #%d\n", mmapsize); + mmaplist = (mmaplist_t*)realloc(mmaplist, mmapsize*sizeof(mmaplist_t)); + #ifndef USE_MMAP + void *p = NULL; + if(posix_memalign(&p, box64_pagesize, MMAPSIZE)) { + dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i); + --mmapsize; + return 0; + } + mprotect(p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC); + #else + void* p = mmap(NULL, MMAPSIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0); + if(p==(void*)-1) { + dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i); + --mmapsize; + return 0; + } + #endif + setProtection((uintptr_t)p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC); + + mmaplist[i].block = p; + mmaplist[i].size = MMAPSIZE; + mmaplist[i].helper = (uint8_t*)calloc(1, MMAPSIZE); + // setup marks + blockmark_t* m = (blockmark_t*)p; + m->prev.x32 = 0; + m->next.fill = 0; + m->next.size = MMAPSIZE-sizeof(blockmark_t); + m = (blockmark_t*)(p+MMAPSIZE-sizeof(blockmark_t)); + m->next.x32 = 0; + m->prev.fill = 0; + m->prev.size = MMAPSIZE-sizeof(blockmark_t); + // alloc 1st block + uintptr_t sub = (uintptr_t)allocBlock(mmaplist[i].block, p, size); + mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); + kh_dynablocks_t *blocks = mmaplist[i].dblist = kh_init(dynablocks); + kh_resize(dynablocks, blocks, 64); + khint_t k; + int ret; + k = kh_put(dynablocks, blocks, (uintptr_t)sub, &ret); + kh_value(blocks, k) = db; + for(int j=0; j<size; ++j) + mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block + j] = (j<256)?j:255; + return sub; +} + +void ActuallyFreeDynarecMap(dynablock_t* db, uintptr_t addr, int size) +{ + if(!addr || !size) + return; + for(int i=0; i<mmapsize; ++i) { + if ((addr>(uintptr_t)mmaplist[i].block) + && (addr<((uintptr_t)mmaplist[i].block+mmaplist[i].size))) { + void* sub = (void*)(addr-sizeof(blockmark_t)); + freeBlock(mmaplist[i].block, sub); + mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size); + kh_dynablocks_t *blocks = mmaplist[i].dblist; + if(blocks) { + khint_t k = kh_get(dynablocks, blocks, (uintptr_t)sub); + if(k!=kh_end(blocks)) + kh_del(dynablocks, blocks, k); + for(int j=0; j<size; ++j) + mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block+j] = 0; + } + return; + } + } + if(mmapsize) + dynarec_log(LOG_NONE, "Warning, block %p (size %d) not found in mmaplist for Free\n", (void*)addr, size); +} + +dynablock_t* FindDynablockFromNativeAddress(void* addr) +{ + // look in actual list + for(int i=0; i<mmapsize; ++i) { + if ((uintptr_t)addr>=(uintptr_t)mmaplist[i].block + && ((uintptr_t)addr<(uintptr_t)mmaplist[i].block+mmaplist[i].size)) { + if(!mmaplist[i].helper) + return FindDynablockDynablocklist(addr, mmaplist[i].dblist); + else { + uintptr_t p = (uintptr_t)addr - (uintptr_t)mmaplist[i].block; + while(mmaplist[i].helper[p]) p -= mmaplist[i].helper[p]; + khint_t k = kh_get(dynablocks, mmaplist[i].dblist, (uintptr_t)mmaplist[i].block + p); + if(k!=kh_end(mmaplist[i].dblist)) + return kh_value(mmaplist[i].dblist, k); + return NULL; + } + } + } + // look in oversized + return FindDynablockDynablocklist(addr, dblist_oversized); +} + +uintptr_t AllocDynarecMap(dynablock_t* db, int size) +{ + if(!size) + return 0; + if(size>MMAPSIZE-2*sizeof(blockmark_t)) { + #ifndef USE_MMAP + void *p = NULL; + if(posix_memalign(&p, box64_pagesize, size)) { + dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size); + return 0; + } + mprotect(p, size, PROT_READ | PROT_WRITE | PROT_EXEC); + #else + void* p = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0); + if(p==(void*)-1) { + dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size); + return 0; + } + #endif + setProtection((uintptr_t)p, size, PROT_READ | PROT_WRITE | PROT_EXEC); + kh_dynablocks_t *blocks = dblist_oversized; + if(!blocks) { + blocks = dblist_oversized = kh_init(dynablocks); + kh_resize(dynablocks, blocks, 64); + } + khint_t k; + int ret; + k = kh_put(dynablocks, blocks, (uintptr_t)p, &ret); + kh_value(blocks, k) = db; + return (uintptr_t)p; + } + + if(pthread_mutex_trylock(&mutex_mmap)) { + sched_yield(); // give it a chance + if(pthread_mutex_trylock(&mutex_mmap)) + return 0; // cannot lock, baillout + } + + uintptr_t ret = FindFreeDynarecMap(db, size); + if(!ret) + ret = AddNewDynarecMap(db, size); + + pthread_mutex_unlock(&mutex_mmap); + + return ret; +} + +void FreeDynarecMap(dynablock_t* db, uintptr_t addr, uint32_t size) +{ + if(size>MMAPSIZE-2*sizeof(blockmark_t)) { + #ifndef USE_MMAP + free((void*)addr); + #else + munmap((void*)addr, size); + #endif + kh_dynablocks_t *blocks = dblist_oversized; + if(blocks) { + khint_t k = kh_get(dynablocks, blocks, addr); + if(k!=kh_end(blocks)) + kh_del(dynablocks, blocks, k); + } + return; + } + pthread_mutex_lock(&mutex_mmap); + ActuallyFreeDynarecMap(db, addr, size); + pthread_mutex_unlock(&mutex_mmap); +} + +dynablocklist_t* getDB(uintptr_t idx) +{ + // already 16bits shifted + uintptr_t idx3 = (idx>>32)&((1<<DYNAMAP_SHIFT)-1); + uintptr_t idx2 = (idx>>16)&((1<<DYNAMAP_SHIFT)-1); + uintptr_t idx1 = (idx )&((1<<DYNAMAP_SHIFT)-1); + + if(!dynmap123[idx3]) + return NULL; + if(!dynmap123[idx3][idx2]) + return NULL; + return dynmap123[idx3][idx2][idx1]; +} // each dynmap is 64k of size -//void addDBFromAddressRange(uintptr_t addr, uintptr_t size) -//{ -// dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1)); -// uintptr_t idx = (addr>>DYNAMAP_SHIFT); -// uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); -// for (uintptr_t i=idx; i<=end; ++i) { -// if(!dynmap[i]) { -// dynmap[i] = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0); -// } -// } -//} - -//void cleanDBFromAddressRange(uintptr_t addr, uintptr_t size, int destroy) -//{ -// dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark"); -// uintptr_t idx = (addr>>DYNAMAP_SHIFT); -// uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); -// for (uintptr_t i=idx; i<=end; ++i) { -// dynablocklist_t* dblist = dynmap[i]; -// if(dblist) { -// if(destroy) -// FreeRangeDynablock(dblist, addr, size); -// else -// MarkRangeDynablock(dblist, addr, size); -// } -// } -//} - -#ifdef ARM -//void arm_next(void); +void addDBFromAddressRange(uintptr_t addr, uintptr_t size) +{ + dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1)); + uintptr_t idx = (addr>>DYNAMAP_SHIFT); + uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); + for (uintptr_t i=idx; i<=end; ++i) { + int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1); + int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1); + int idx1 = (i )&((1<<DYNAMAP_SHIFT)-1); + if(!dynmap123[idx3]) + dynmap123[idx3] = (dynablocklist_t***)calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t**)); + if(!dynmap123[idx3][idx2]) + dynmap123[idx3][idx2] = (dynablocklist_t**)calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t*)); + if(!dynmap123[idx3][idx2][idx1]) + dynmap123[idx3][idx2][idx1] = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0); + } +} + +void cleanDBFromAddressRange(uintptr_t addr, uintptr_t size, int destroy) +{ + dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark"); + uintptr_t idx = (addr>>DYNAMAP_SHIFT); + uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT); + for (uintptr_t i=idx; i<=end; ++i) { + int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1); + int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1); + int idx1 = (i )&((1<<DYNAMAP_SHIFT)-1); + if(dynmap123[idx3] && dynmap123[idx3][idx2]) { + dynablocklist_t* dblist = dynmap123[idx3][idx2][idx1]; + if(dblist) { + if(destroy) + FreeRangeDynablock(dblist, addr, size); + else + MarkRangeDynablock(dblist, addr, size); + } + } + } +} + +#ifdef ARM64 +void arm64_next(void); #endif -//void addJumpTableIfDefault(void* addr, void* jmp) -//{ -// const uintptr_t idx = ((uintptr_t)addr>>JMPTABL_SHIFT); -// if(box64_jumptable[idx] == box64_jmptbl_default) { -// uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); -// for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) -// tbl[i] = (uintptr_t)arm_next; -// box64_jumptable[idx] = tbl; -// } -// const uintptr_t off = (uintptr_t)addr&((1<<JMPTABL_SHIFT)-1); -// if(box64_jumptable[idx][off]==(uintptr_t)arm_next) -// box64_jumptable[idx][off] = (uintptr_t)jmp; -//} -//void setJumpTableDefault(void* addr) -//{ -// const uintptr_t idx = ((uintptr_t)addr>>JMPTABL_SHIFT); -// if(box64_jumptable[idx] == box64_jmptbl_default) { -// return; -// } -// const uintptr_t off = (uintptr_t)addr&((1<<JMPTABL_SHIFT)-1); -// box64_jumptable[idx][off] = (uintptr_t)arm_next; -//} -//uintptr_t getJumpTable() -//{ -// return (uintptr_t)box64_jumptable; -//} - -//uintptr_t getJumpTableAddress(uintptr_t addr) -//{ -// const uintptr_t idx = ((uintptr_t)addr>>JMPTABL_SHIFT); -// if(box64_jumptable[idx] == box64_jmptbl_default) { -// uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); -// for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) -// tbl[i] = (uintptr_t)arm_next; -// box64_jumptable[idx] = tbl; -// } -// const uintptr_t off = (uintptr_t)addr&((1<<JMPTABL_SHIFT)-1); -// return (uintptr_t)&box64_jumptable[idx][off]; -//} +void addJumpTableIfDefault64(void* addr, void* jmp) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx0 = (((uintptr_t)addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { + uintptr_t*** tbl = (uintptr_t***)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault1; + box64_jmptbl3[idx3] = tbl; + } + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) { + uintptr_t** tbl = (uintptr_t**)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault0; + box64_jmptbl3[idx3][idx2] = tbl; + } + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { + uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = (uintptr_t)arm64_next; + box64_jmptbl3[idx3][idx2][idx1] = tbl; + } + + if(box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)arm64_next) + box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)jmp; +} +void setJumpTableDefault64(void* addr) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx0 = (((uintptr_t)addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) + return; + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) + return; + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) + return; + if(box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)arm64_next) + return; + box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)arm64_next; +} +uintptr_t getJumpTable64() +{ + return (uintptr_t)box64_jmptbl3; +} + +uintptr_t getJumpTableAddress64(uintptr_t addr) +{ + uintptr_t idx3, idx2, idx1, idx0; + idx3 = (((uintptr_t)addr)>>48)&0xffff; + idx2 = (((uintptr_t)addr)>>32)&0xffff; + idx1 = (((uintptr_t)addr)>>16)&0xffff; + idx0 = (((uintptr_t)addr) )&0xffff; + if(box64_jmptbl3[idx3] == box64_jmptbldefault2) { + uintptr_t*** tbl = (uintptr_t***)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault1; + box64_jmptbl3[idx3] = tbl; + } + if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) { + uintptr_t** tbl = (uintptr_t**)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = box64_jmptbldefault0; + box64_jmptbl3[idx3][idx2] = tbl; + } + if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) { + uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t)); + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) + tbl[i] = (uintptr_t)arm64_next; + box64_jmptbl3[idx3][idx2][idx1] = tbl; + } + + return (uintptr_t)&box64_jmptbl3[idx3][idx2][idx1][idx0]; +} // Remove the Write flag from an adress range, so DB can be executed // no log, as it can be executed inside a signal handler -//void protectDB(uintptr_t addr, uintptr_t size) -//{ -// dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1)); -// uintptr_t idx = (addr>>MEMPROT_SHIFT); -// uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT); -// pthread_mutex_lock(&mutex_prot); -// for (uintptr_t i=idx; i<=end; ++i) { -// uint32_t prot = memprot[i]; -// if(!prot) -// prot = PROT_READ | PROT_WRITE; // comes from malloc & co, so should not be able to execute -// memprot[i] = prot|PROT_DYNAREC; -// if(!(prot&PROT_DYNAREC)) -// mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); -// } -// pthread_mutex_unlock(&mutex_prot); -//} - -//void protectDBnolock(uintptr_t addr, uintptr_t size) -//{ -// dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1)); -// uintptr_t idx = (addr>>MEMPROT_SHIFT); -// uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT); -// for (uintptr_t i=idx; i<=end; ++i) { -// uint32_t prot = memprot[i]; -// if(!prot) -// prot = PROT_READ | PROT_WRITE; // comes from malloc & co, so should not be able to execute -// memprot[i] = prot|PROT_DYNAREC; -// if(!(prot&PROT_DYNAREC)) -// mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); -// } -//} - -//void lockDB() -//{ -// pthread_mutex_lock(&mutex_prot); -//} - -//void unlockDB() -//{ -// pthread_mutex_unlock(&mutex_prot); -//} +void protectDB(uintptr_t addr, uintptr_t size) +{ + dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1)); + uintptr_t idx = (addr>>MEMPROT_SHIFT); + uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT); + int ret; + pthread_mutex_lock(&mutex_prot); + for (uintptr_t i=idx; i<=end; ++i) { + const uint32_t key = (i>>16)&0xffffffff; + khint_t k = kh_put(memprot, memprot, key, &ret); + if(ret) { + uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE); + kh_value(memprot, k) = m; + } + const uintptr_t ii = i&(MEMPROT_SIZE-1); + uint8_t prot = kh_value(memprot, k)[ii]; + if(!prot) + prot = PROT_READ | PROT_WRITE; // comes from malloc & co, so should not be able to execute + kh_value(memprot, k)[ii] = prot|PROT_DYNAREC; + if(!(prot&PROT_DYNAREC)) + mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); + } + pthread_mutex_unlock(&mutex_prot); +} + +void protectDBnolock(uintptr_t addr, uintptr_t size) +{ + dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1)); + uintptr_t idx = (addr>>MEMPROT_SHIFT); + uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT); + int ret; + for (uintptr_t i=idx; i<=end; ++i) { + const uint32_t key = (i>>16)&0xffffffff; + khint_t k = kh_put(memprot, memprot, key, &ret); + if(ret) { + uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE); + kh_value(memprot, k) = m; + } + const uintptr_t ii = i&(MEMPROT_SIZE-1); + uint8_t prot = kh_value(memprot, k)[ii]; + if(!prot) + prot = PROT_READ | PROT_WRITE; // comes from malloc & co, so should not be able to execute + kh_value(memprot, k)[ii] = prot|PROT_DYNAREC; + if(!(prot&PROT_DYNAREC)) + mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE); + } +} + +void lockDB() +{ + pthread_mutex_lock(&mutex_prot); +} + +void unlockDB() +{ + pthread_mutex_unlock(&mutex_prot); +} // Add the Write flag from an adress range, and mark all block as dirty // no log, as it can be executed inside a signal handler -//void unprotectDB(uintptr_t addr, uintptr_t size) -//{ -// dynarec_log(LOG_DEBUG, "unprotectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1)); -// uintptr_t idx = (addr>>MEMPROT_SHIFT); -// uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT); -// pthread_mutex_lock(&mutex_prot); -// for (uintptr_t i=idx; i<=end; ++i) { -// uint32_t prot = memprot[i]; -// memprot[i] = prot&~PROT_DYNAREC; -// if(prot&PROT_DYNAREC) { -// mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_DYNAREC); -// cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0); -// } -// } -// pthread_mutex_unlock(&mutex_prot); -//} +void unprotectDB(uintptr_t addr, uintptr_t size) +{ + dynarec_log(LOG_DEBUG, "unprotectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1)); + uintptr_t idx = (addr>>MEMPROT_SHIFT); + uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT); + int ret; + pthread_mutex_lock(&mutex_prot); + for (uintptr_t i=idx; i<=end; ++i) { + const uint32_t key = (i>>16)&0xffffffff; + khint_t k = kh_put(memprot, memprot, key, &ret); + if(ret) { + uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE); + kh_value(memprot, k) = m; + } + const uintptr_t ii = i&(MEMPROT_SIZE-1); + uint8_t prot = kh_value(memprot, k)[ii]; + kh_value(memprot, k)[ii] = prot&~PROT_DYNAREC; + if(prot&PROT_DYNAREC) { + mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_DYNAREC); + cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0); + } + } + pthread_mutex_unlock(&mutex_prot); +} #endif @@ -701,11 +787,14 @@ void init_custommem_helper(box64context_t* ctx) pthread_mutex_init(&mutex_prot, NULL); #ifdef DYNAREC pthread_mutex_init(&mutex_mmap, NULL); -#ifdef ARM -// for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) -// box64_jmptbl_default[i] = (uintptr_t)arm_next; -// for(int i=0; i<JMPTABL_SIZE; ++i) -// box64_jumptable[i] = box64_jmptbl_default; +#ifdef ARM64 + if(box64_dynarec) + for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) { + box64_jmptbldefault0[i] = (uintptr_t)arm64_next; + box64_jmptbldefault1[i] = box64_jmptbldefault0; + box64_jmptbldefault2[i] = box64_jmptbldefault1; + box64_jmptbl3[i] = box64_jmptbldefault2; + } #else #error Unsupported architecture! #endif @@ -718,51 +807,57 @@ void fini_custommem_helper(box64context_t *ctx) return; inited = 0; #ifdef DYNAREC -// dynarec_log(LOG_DEBUG, "Free global Dynarecblocks\n"); -// for (int i=0; i<mmapsize; ++i) { -// if(mmaplist[i].block) -// #ifdef USE_MMAP -// munmap(mmaplist[i].block, mmaplist[i].size); -// #else -// free(mmaplist[i].block); -// #endif -// if(mmaplist[i].dblist) { -// kh_destroy(dynablocks, mmaplist[i].dblist); -// mmaplist[i].dblist = NULL; -// } -// if(mmaplist[i].helper) { -// free(mmaplist[i].helper); -// mmaplist[i].helper = NULL; -// } -// } -// if(dblist_oversized) { -// kh_destroy(dynablocks, dblist_oversized); -// dblist_oversized = NULL; -// } -// mmapsize = 0; -// dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n"); -// uintptr_t idx = 0; -// uintptr_t end = ((0xFFFFFFFF)>>DYNAMAP_SHIFT); -// for (uintptr_t i=idx; i<=end; ++i) { -// dynablocklist_t* dblist = dynmap[i]; -// if(dblist) { -// uintptr_t startdb = StartDynablockList(dblist); -// uintptr_t enddb = EndDynablockList(dblist); -// uintptr_t startaddr = 0; -// if(startaddr<startdb) startaddr = startdb; -// uintptr_t endaddr = 0xFFFFFFFF; -// if(endaddr>enddb) endaddr = enddb; -// FreeRangeDynablock(dblist, startaddr, endaddr-startaddr+1); -// } -// } -// for (uintptr_t i=idx; i<=end; ++i) -// if(dynmap[i]) -// FreeDynablockList(&dynmap[i]); -// pthread_mutex_destroy(&mutex_mmap); -// free(mmaplist); -// for (int i=0; i<DYNAMAP_SIZE; ++i) -// if(box64_jumptable[i]!=box64_jmptbl_default) -// free(box64_jumptable[i]); + if(box64_dynarec) { + dynarec_log(LOG_DEBUG, "Free global Dynarecblocks\n"); + for (int i=0; i<mmapsize; ++i) { + if(mmaplist[i].block) + #ifdef USE_MMAP + munmap(mmaplist[i].block, mmaplist[i].size); + #else + free(mmaplist[i].block); + #endif + if(mmaplist[i].dblist) { + kh_destroy(dynablocks, mmaplist[i].dblist); + mmaplist[i].dblist = NULL; + } + if(mmaplist[i].helper) { + free(mmaplist[i].helper); + mmaplist[i].helper = NULL; + } + } + if(dblist_oversized) { + kh_destroy(dynablocks, dblist_oversized); + dblist_oversized = NULL; + } + mmapsize = 0; + dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n"); + for (uintptr_t idx3=0; idx3<=0xffff; ++idx3) + if(dynmap123[idx3]) { + for (uintptr_t idx2=0; idx2<=0xffff; ++idx2) + if(dynmap123[idx3][idx2]) { + for (uintptr_t idx1=0; idx1<=0xffff; ++idx1) + if(dynmap123[idx3][idx2][idx1]) + FreeDynablockList(&dynmap123[idx3][idx2][idx1]); + free(dynmap123[idx3][idx2]); + } + free(dynmap123[idx3]); + } + + free(mmaplist); + pthread_mutex_destroy(&mutex_mmap); + for (int i3=0; i3<(1<<DYNAMAP_SHIFT); ++i3) + if(box64_jmptbl3[i3]!=box64_jmptbldefault2) { + for (int i2=0; i2<(1<<DYNAMAP_SHIFT); ++i2) + if(box64_jmptbl3[i3][i2]!=box64_jmptbldefault1) { + for (int i1=0; i1<(1<<DYNAMAP_SHIFT); ++i1) + if(box64_jmptbl3[i3][i2][i1]!=box64_jmptbldefault0) { + free(box64_jmptbl3[i3][i2][i1]); + } + free(box64_jmptbl3[i3][i2]); + } + free(box64_jmptbl3[i3]); + } + } #endif uint8_t* m; kh_foreach_value(memprot, m, diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S new file mode 100755 index 00000000..af39c1ba --- /dev/null +++ b/src/dynarec/arm64_epilog.S @@ -0,0 +1,81 @@ +//arm epilog for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global arm64_epilog +arm64_epilog: + //update register -> emu + str x10, [x0, (8 * 0)] + str x11, [x0, (8 * 1)] + str x12, [x0, (8 * 2)] + str x13, [x0, (8 * 3)] + str x14, [x0, (8 * 4)] + str x15, [x0, (8 * 5)] + str x16, [x0, (8 * 6)] + str x17, [x0, (8 * 7)] + str x18, [x0, (8 * 8)] + str x19, [x0, (8 * 9)] + str x20, [x0, (8 * 10)] + str x21, [x0, (8 * 11)] + str x22, [x0, (8 * 12)] + str x23, [x0, (8 * 13)] + str x24, [x0, (8 * 14)] + str x25, [x0, (8 * 15)] + str x26, [x0, (8 * 16)] + str x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x25 must be EIP now) + //restore all used register + //vpop {d8-d15} + ldr x10, [sp, (8 * 0)] + ldr x11, [sp, (8 * 1)] + ldr x12, [sp, (8 * 2)] + ldr x13, [sp, (8 * 3)] + ldr x14, [sp, (8 * 4)] + ldr x15, [sp, (8 * 5)] + ldr x16, [sp, (8 * 6)] + ldr x17, [sp, (8 * 7)] + ldr x18, [sp, (8 * 8)] + ldr x19, [sp, (8 * 9)] + ldr x20, [sp, (8 * 10)] + ldr x21, [sp, (8 * 11)] + ldr x22, [sp, (8 * 12)] + ldr x23, [sp, (8 * 13)] + ldr x24, [sp, (8 * 14)] + ldr x25, [sp, (8 * 15)] + ldr x26, [sp, (8 * 16)] + ldr x27, [sp, (8 * 17)] + add sp, sp, (8 * 18) + ldp lr, fp, [sp, 16]! // saved lr + //end, return... + ret + + +.global arm64_epilog_fast +arm64_epilog_fast: + //restore all used register + //vpop {d8-d15} + ldr x8, [sp, (8 * 0)] + ldr x9, [sp, (8 * 1)] + ldr x10, [sp, (8 * 2)] + ldr x11, [sp, (8 * 3)] + ldr x12, [sp, (8 * 4)] + ldr x13, [sp, (8 * 5)] + ldr x14, [sp, (8 * 6)] + ldr x15, [sp, (8 * 7)] + ldr x16, [sp, (8 * 8)] + ldr x17, [sp, (8 * 9)] + ldr x18, [sp, (8 * 10)] + ldr x19, [sp, (8 * 11)] + ldr x20, [sp, (8 * 12)] + ldr x21, [sp, (8 * 13)] + ldr x22, [sp, (8 * 14)] + ldr x23, [sp, (8 * 15)] + ldr x24, [sp, (8 * 16)] + ldr x25, [sp, (8 * 17)] + add sp, sp, (8 * 18) + ldp lr, fp, [sp, 16]! // saved lr + //end, return... + ret diff --git a/src/dynarec/arm64_lock_helper.S b/src/dynarec/arm64_lock_helper.S new file mode 100755 index 00000000..51b43316 --- /dev/null +++ b/src/dynarec/arm64_lock_helper.S @@ -0,0 +1,87 @@ +//arm lock helper +//there is 2 part: read and write +// write return 0 on success, 1 on fail (value has been changed) + +.text +.align 4 + +.global arm64_lock_read_b +.global arm64_lock_write_b +.global arm64_lock_read_h +.global arm64_lock_write_h +.global arm64_lock_read_d +.global arm64_lock_write_d +.global arm64_lock_read_dd +.global arm64_lock_write_dd +.global arm64_lock_xchg +.global arm64_lock_storeifnull + + +arm64_lock_read_b: + // address is x0, return is x0 + ldaxrb w0, [x0] + ret + +arm64_lock_write_b: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxrb w0, w1, [x2] + ret + +arm64_lock_read_h: + // address is x0, return is x0 + ldaxrh w0, [x0] + ret + +arm64_lock_write_h: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxrh w0, w1, [x2] + ret + +arm64_lock_read_d: + // address is x0, return is x0 + #ldaxr w0, [x0] + ldr w0,[x0] + ret + +arm64_lock_write_d: + // address is x0, value is w1, return is x0 + mov x2, x0 + #stlxr w0, w1, [x2] + str w1, [x2] + mov w0, 0 + ret + +arm64_lock_read_dd: + // address is x0, return is x0 + ldaxr x0, [x0] + ret + +arm64_lock_write_dd: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxr w0, x1, [x2] + ret + +arm64_lock_xchg: + // address is x0, value is x1, return old value in x0 + ldaxr w2, [x0] + stlxr w3, w1, [x0] + cmp w3, #1 + beq arm64_lock_xchg + mov w0, w2 + ret + +arm64_lock_storeifnull: + // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value) + ldaxr x2, [x0] + cmp x2, #0 + bne arm64_lock_storeifnull_exit + mov x2, x1 + stlxr w3, x2, [x0] + cmp w3, #1 + beq arm64_lock_storeifnull +arm64_lock_storeifnull_exit: + mov x0, x2 + ret diff --git a/src/dynarec/arm64_lock_helper.h b/src/dynarec/arm64_lock_helper.h new file mode 100755 index 00000000..a6879bea --- /dev/null +++ b/src/dynarec/arm64_lock_helper.h @@ -0,0 +1,31 @@ +#ifndef __ARM64_LOCK_HELPER__H__ +#define __ARM64_LOCK_HELPER__H__ +#include <stdint.h> + +// LDAXRB of ADDR +extern uint8_t arm64_lock_read_b(void* addr); +// STLXRB of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_b(void* addr, uint8_t val); + +// LDAXRH of ADDR +extern uint16_t arm64_lock_read_h(void* addr); +// STLXRH of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_h(void* addr, uint16_t val); + +// LDAXR of ADDR +extern uint32_t arm64_lock_read_d(void* addr); +// STLXR of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_d(void* addr, uint32_t val); + +// LDAXRD of ADDR +extern uint64_t arm64_lock_read_dd(void* addr); +// STLXR of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_dd(void* addr, uint64_t val); + +// Atomicaly exchange value at [p] with val, return old p +extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val); + +// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old) +extern void* arm64_lock_storeifnull(void*p, void* val); + +#endif //__ARM64_LOCK_HELPER__H__ \ No newline at end of file diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S new file mode 100755 index 00000000..2410750c --- /dev/null +++ b/src/dynarec/arm64_next.S @@ -0,0 +1,47 @@ +//arm update linker table for dynarec +//called with pointer to emu as 1st parameter +//and address of table to as 2nd parameter +//ip is at r12 + +.text +.align 4 + +.extern LinkNext + +.global arm64_next +arm64_next: + // emu is r0 + // don't put put back reg value in emu, faster but more tricky to debug + // IP address is r1 + sub sp, sp, (8 * 11) + str x0, [sp, (8 * 0)] + str x1, [sp, (8 * 1)] + str x10, [sp, (8 * 2)] + str x11, [sp, (8 * 3)] + str x12, [sp, (8 * 4)] + str x13, [sp, (8 * 5)] + str x14, [sp, (8 * 6)] + str x15, [sp, (8 * 7)] + str x16, [sp, (8 * 8)] + str x17, [sp, (8 * 9)] + str x18, [sp, (8 * 10)] + // call the function + bl LinkNext + // preserve return value + mov x3, x0 + // pop regs + ldr x0, [sp, (8 * 0)] + ldr x1, [sp, (8 * 1)] + ldr x10, [sp, (8 * 2)] + ldr x11, [sp, (8 * 3)] + ldr x12, [sp, (8 * 4)] + ldr x13, [sp, (8 * 5)] + ldr x14, [sp, (8 * 6)] + ldr x15, [sp, (8 * 7)] + ldr x16, [sp, (8 * 8)] + ldr x17, [sp, (8 * 9)] + ldr x18, [sp, (8 * 10)] + add sp, sp, (8 * 11) + // return offset is jump address + br x3 + diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c new file mode 100755 index 00000000..49539a1a --- /dev/null +++ b/src/dynarec/arm64_printer.c @@ -0,0 +1,14 @@ +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdio.h> + +#include "arm64_printer.h" + +const char* arm64_print(uint32_t opcode) +{ + static char buff[200]; + + snprintf(buff, sizeof(buff), "0x%8X ???", opcode); + return buff; +} \ No newline at end of file diff --git a/src/dynarec/arm64_printer.h b/src/dynarec/arm64_printer.h new file mode 100644 index 00000000..6fe21c33 --- /dev/null +++ b/src/dynarec/arm64_printer.h @@ -0,0 +1,6 @@ +#ifndef _ARM_PRINTER_H_ +#define _ARM_PRINTER_H_ + +const char* arm64_print(uint32_t opcode); + +#endif //_ARM_PRINTER_H_ diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S new file mode 100755 index 00000000..f480f2ea --- /dev/null +++ b/src/dynarec/arm64_prolog.S @@ -0,0 +1,53 @@ +//arm prologue for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global arm64_prolog +arm64_prolog: + //save all 18 used register + stp lr, fp, [sp, 16]! // save lr + sub sp, sp, (8 * 18) + str x10, [sp, (8 * 0)] + str x11, [sp, (8 * 1)] + str x12, [sp, (8 * 2)] + str x13, [sp, (8 * 3)] + str x14, [sp, (8 * 4)] + str x15, [sp, (8 * 5)] + str x16, [sp, (8 * 6)] + str x17, [sp, (8 * 7)] + str x18, [sp, (8 * 8)] + str x19, [sp, (8 * 9)] + str x20, [sp, (8 * 10)] + str x21, [sp, (8 * 11)] + str x22, [sp, (8 * 12)] + str x23, [sp, (8 * 13)] + str x24, [sp, (8 * 14)] + str x25, [sp, (8 * 15)] + str x26, [sp, (8 * 16)] + str x27, [sp, (8 * 17)] + //vpush {d8-d15} // save NEON regs? + //setup emu -> register + ldr x10, [x0, (8 * 0)] + ldr x11, [x0, (8 * 1)] + ldr x12, [x0, (8 * 2)] + ldr x13, [x0, (8 * 3)] + ldr x14, [x0, (8 * 4)] + ldr x15, [x0, (8 * 5)] + ldr x16, [x0, (8 * 6)] + ldr x17, [x0, (8 * 7)] + ldr x18, [x0, (8 * 8)] + ldr x19, [x0, (8 * 9)] + ldr x20, [x0, (8 * 10)] + ldr x21, [x0, (8 * 11)] + ldr x22, [x0, (8 * 12)] + ldr x23, [x0, (8 * 13)] + ldr x24, [x0, (8 * 14)] + ldr x25, [x0, (8 * 15)] + ldr x26, [x0, (8 * 16)] + ldr x27, [x0, (8 * 17)] + //jump to function + br x1 diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c new file mode 100755 index 00000000..2ab39d09 --- /dev/null +++ b/src/dynarec/dynablock.c @@ -0,0 +1,422 @@ +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynablock_private.h" +#include "dynarec_private.h" +#include "elfloader.h" +#ifdef ARM64 +#include "dynarec_arm64.h" +#include "arm64_lock_helper.h" +#else +#error Unsupported architecture! +#endif +#include "custommem.h" +#include "khash.h" + +KHASH_MAP_INIT_INT(dynablocks, dynablock_t*) + +uint32_t X31_hash_code(void* addr, int len) +{ + if(!len) return 0; + uint8_t* p = (uint8_t*)addr; + int32_t h = *p; + for (--len, ++p; len; --len, ++p) h = (h << 5) - h + (int32_t)*p; + return (uint32_t)h; +} + +dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct) +{ + if(!textsz) { + printf_log(LOG_NONE, "Error, creating a NULL sized Dynablock\n"); + return NULL; + } + dynablocklist_t* ret = (dynablocklist_t*)calloc(1, sizeof(dynablocklist_t)); + ret->text = text; + ret->textsz = textsz; + if(direct && textsz) { + ret->direct = (dynablock_t**)calloc(textsz, sizeof(dynablock_t*)); + if(!ret->direct) {printf_log(LOG_NONE, "Warning, fail to create direct block for dynablock @%p\n", (void*)text);} + } + dynarec_log(LOG_DEBUG, "New Dynablocklist %p, from %p->%p\n", ret, (void*)text, (void*)(text+textsz)); + return ret; +} + +void FreeDynablock(dynablock_t* db) +{ + if(db) { + dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size, db->father, db->sons_size, db->gone); + if(db->gone) + return; // already in the process of deletion! + db->done = 0; + db->gone = 1; + // remove from direct if there + uintptr_t startdb = db->parent->text; + uintptr_t enddb = db->parent->text + db->parent->textsz; + if(db->parent->direct) { + uintptr_t addr = (uintptr_t)db->x64_addr; + if(addr>=startdb && addr<enddb) + db->parent->direct[addr-startdb] = NULL; + } + // remove jumptable + setJumpTableDefault64(db->x64_addr); + // remove and free the sons + for (int i=0; i<db->sons_size; ++i) { + dynablock_t *son = (dynablock_t*)arm64_lock_xchg(&db->sons[i], 0); + FreeDynablock(son); + } + // only the father free the DynarecMap + if(!db->father) { + dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->block, db->size); + FreeDynarecMap(db, (uintptr_t)db->block, db->size); + } + free(db->sons); + free(db->instsize); + free(db); + } +} + +void FreeDynablockList(dynablocklist_t** dynablocks) +{ + if(!dynablocks) + return; + if(!*dynablocks) + return; + dynarec_log(LOG_DEBUG, "Free Dynablocklist %p, with Direct Blocks %p\n", *dynablocks, (*dynablocks)->direct); + if((*dynablocks)->direct) { + for (int i=0; i<(*dynablocks)->textsz; ++i) { + if((*dynablocks)->direct[i] && !(*dynablocks)->direct[i]->father) + FreeDynablock((*dynablocks)->direct[i]); + } + free((*dynablocks)->direct); + } + (*dynablocks)->direct = NULL; + + free(*dynablocks); + *dynablocks = NULL; +} + +void MarkDynablock(dynablock_t* db) +{ + if(db) { + if(db->father) + db = db->father; // mark only father + if(db->need_test) + return; // already done + db->need_test = 1; // test only blocks that can be marked (and so deleted) + setJumpTableDefault64(db->x64_addr); + for(int i=0; i<db->sons_size; ++i) + setJumpTableDefault64(db->sons[i]->x64_addr); + } +} + +uintptr_t StartDynablockList(dynablocklist_t* db) +{ + if(db) + return db->text; + return 0; +} +uintptr_t EndDynablockList(dynablocklist_t* db) +{ + if(db) + return db->text+db->textsz-1; + return 0; +} + +int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2) +{ + if(start1 > end2 || start2 > end1) + return 0; + return 1; +} + +void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +{ + if(!dynablocks) + return; + if(!dynablocks->direct) + return; + uintptr_t startdb = dynablocks->text; + uintptr_t enddb = startdb + dynablocks->textsz -1; + uintptr_t start = addr; + uintptr_t end = addr+size-1; + if(start<startdb) + start = startdb; + if(end>enddb) + end = enddb; + dynablock_t *db; + if(end>startdb && start<enddb) + for(uintptr_t i = start; i<end; ++i) + if((db=dynablocks->direct[i-startdb])) + if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1)) + MarkDynablock(db); +} + +void FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +{ + if(!dynablocks) + return; + + if(dynablocks->direct) { + dynablock_t* db; + int ret; + khint_t k; + kh_dynablocks_t *blocks = kh_init(dynablocks); + // copy in a temporary list + if(dynablocks->direct) { + uintptr_t startdb = dynablocks->text; + uintptr_t enddb = startdb + dynablocks->textsz; + uintptr_t start = addr; + uintptr_t end = addr+size; + if(start<startdb) + start = startdb; + if(end>enddb) + end = enddb; + if(end>startdb && start<enddb) + for(uintptr_t i = start; i<end; ++i) { + db = (dynablock_t*)arm64_lock_xchg(&dynablocks->direct[i-startdb], 0); + if(db) { + if(db->father) + db = db->father; + if(db->parent==dynablocks) { + k = kh_put(dynablocks, blocks, (uintptr_t)db, &ret); + kh_value(blocks, k) = db; + } + } + } + } + // purge the list + kh_foreach_value(blocks, db, + FreeDynablock(db); + ); + kh_destroy(dynablocks, blocks); + } +} +void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +{ + if(!dynablocks) + return; + if(dynablocks->direct) { + uintptr_t new_addr = addr - dynablocks->maxsz; + uintptr_t new_size = size + dynablocks->maxsz; + MarkDirectDynablock(dynablocks, new_addr, new_size); + // the blocks check before + for(int idx=(new_addr)>>DYNAMAP_SHIFT; idx<(addr>>DYNAMAP_SHIFT); ++idx) + MarkDirectDynablock(getDB(idx), new_addr, new_size); + } +} + +dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks) +{ + if(!dynablocks) + return NULL; + dynablock_t* db; + kh_foreach_value(dynablocks, db, + const uintptr_t s = (uintptr_t)db->block; + const uintptr_t e = (uintptr_t)db->block+db->size; + if((uintptr_t)addr>=s && (uintptr_t)addr<e) + return db->father?db->father:db; + ) + return NULL; +} + +static dynablocklist_t* getDBFromAddress(uintptr_t addr) +{ + const uintptr_t idx = (addr>>DYNAMAP_SHIFT); + return getDB(idx); +} + +dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created) +{ + if(!dynablocks) { + dynarec_log(LOG_INFO, "Warning: Ask to create a dynablock with a NULL dynablocklist (addr=%p)\n", (void*)addr); + *created = 0; + return NULL; + } + if((addr<dynablocks->text) || (addr>=(dynablocks->text+dynablocks->textsz))) { + // this should be useless + //dynarec_log(LOG_INFO, "Warning: Refused to create a Direct Block that is out-of-bound: dynablocks=%p (%p:%p), addr=%p\n", dynablocks, (void*)(dynablocks->text), (void*)(dynablocks->text+dynablocks->textsz), (void*)addr); + //*created = 0; + //return NULL; + return AddNewDynablock(getDBFromAddress(addr), addr, created); + } + dynablock_t* block = NULL; + // first, check if it exist in direct access mode + if(dynablocks->direct) { + block = dynablocks->direct[addr-dynablocks->text]; + if(block) { + dynarec_log(LOG_DUMP, "Block already exist in Direct Map\n"); + *created = 0; + return block; + } + } + + if (!*created) + return block; + + if(!dynablocks->direct) { + dynablock_t** p = (dynablock_t**)calloc(dynablocks->textsz, sizeof(dynablock_t*)); + if(arm64_lock_storeifnull(&dynablocks->direct, p)!=p) + free(p); // someone already create the direct array, too late... + } + + // create and add new block + dynarec_log(LOG_DUMP, "Ask for DynaRec Block creation @%p\n", (void*)addr); + + block = (dynablock_t*)calloc(1, sizeof(dynablock_t)); + block->parent = dynablocks; + dynablock_t* tmp = (dynablock_t*)arm64_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block); + if(tmp != block) { + // a block appeard! + free(block); + *created = 0; + return tmp; + } + + *created = 1; + return block; +} + +/* + return NULL if block is not found / cannot be created. + Don't create if create==0 +*/ +static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, dynablock_t* current) +{ + // try the quickest way first: get parent of current and check if ok! + dynablocklist_t *dynablocks = NULL; + dynablock_t* block = NULL; + if(current) { + dynablocks = current->parent; + if(dynablocks && !(addr>=dynablocks->text && addr<(dynablocks->text+dynablocks->textsz))) + dynablocks = NULL; + } + // nope, lets do the long way + if(!dynablocks) { + dynablocks = getDBFromAddress(addr); + if(!dynablocks) { + dynablocks = GetDynablocksFromAddress(emu->context, addr); + if(!dynablocks) + return NULL; + } + } + // check direct first, without lock + if(dynablocks->direct/* && (addr>=dynablocks->text) && (addr<(dynablocks->text+dynablocks->textsz))*/) + if((block = dynablocks->direct[addr-dynablocks->text])) + return block; + + int created = create; + block = AddNewDynablock(dynablocks, addr, &created); + if(!created) + return block; // existing block... + + if(box64_dynarec_dump) + pthread_mutex_lock(&my_context->mutex_dyndump); + // fill the block + block->x64_addr = (void*)addr; + if(0/*!FillBlock64(block, filladdr)*/) { + void* old = (void*)arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], 0); + if(old!=block && old) {// put it back in place, strange things are happening here! + dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old); + arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], (uintptr_t)old); + } + free(block); + block = NULL; + } + if(box64_dynarec_dump) + pthread_mutex_unlock(&my_context->mutex_dyndump); + // check size + if(block && block->x64_size) { + int blocksz = block->x64_size; + if(dynablocks->maxsz<blocksz) { + dynablocks->maxsz = blocksz; + for(int idx=(addr>>DYNAMAP_SHIFT)+1; idx<=((addr+blocksz)>>DYNAMAP_SHIFT); ++idx) { + dynablocklist_t* dblist; + if((dblist = getDB(idx))) + if(dblist->maxsz<blocksz) + dblist->maxsz = blocksz; + } + } + lockDB(); + protectDBnolock((uintptr_t)block->x64_addr, block->x64_size); + // fill-in jumptable + addJumpTableIfDefault64(block->x64_addr, block->block); + for(int i=0; i<block->sons_size; ++i) + addJumpTableIfDefault64(block->sons[i]->x64_addr, block->sons[i]->block); + unlockDB(); + } + + dynarec_log(LOG_DEBUG, " --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:0)), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0); + + return block; +} + +dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current) +{ + dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, *current); + if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) { + dynablock_t *father = db->father?db->father:db; + uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size); + if(hash!=father->hash) { + dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr); + // no more current if it gets invalidated too + if(*current && father->x64_addr>=(*current)->x64_addr && (father->x64_addr+father->x64_size)<(*current)->x64_addr) + *current = NULL; + // Free father, it's now invalid! + FreeDynablock(father); + // start again... (will create a new block) + db = internalDBGetBlock(emu, addr, addr, create, *current); + } else { + father->need_test = 0; + lockDB(); + protectDBnolock((uintptr_t)father->x64_addr, father->x64_size); + // fill back jumptable + addJumpTableIfDefault64(father->x64_addr, father->block); + for(int i=0; i<father->sons_size; ++i) + addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block); + unlockDB(); + } + } + return db; +} + +dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr) +{ + dynarec_log(LOG_DEBUG, "Creating AlternateBlock at %p for %p\n", (void*)addr, (void*)filladdr); + int create = 1; + dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, NULL); + if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) { + dynablock_t *father = db->father?db->father:db; + uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size); + if(hash!=father->hash) { + dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr); + // Free father, it's now invalid! + FreeDynablock(father); + // start again... (will create a new block) + db = internalDBGetBlock(emu, addr, filladdr, create, NULL); + } else { + father->need_test = 0; + lockDB(); + protectDBnolock((uintptr_t)father->x64_addr, father->x64_size); + // fill back jumptable + addJumpTableIfDefault64(father->x64_addr, father->block); + for(int i=0; i<father->sons_size; ++i) + addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block); + unlockDB(); + } + } + return db; +} diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h new file mode 100755 index 00000000..dd2ee4c0 --- /dev/null +++ b/src/dynarec/dynablock_private.h @@ -0,0 +1,36 @@ +#ifndef __DYNABLOCK_PRIVATE_H_ +#define __DYNABLOCK_PRIVATE_H_ + +typedef struct dynablocklist_s dynablocklist_t; + +typedef struct instsize_s { + unsigned int x64:4; + unsigned int nat:4; +} instsize_t; + +typedef struct dynablock_s { + dynablocklist_t* parent; + void* block; + int size; + void* x64_addr; + uintptr_t x64_size; + uint32_t hash; + uint8_t need_test; + uint8_t done; + uint8_t gone; + uint8_t dummy; + int isize; + dynablock_t** sons; // sons (kind-of dummy dynablock...) + int sons_size; + dynablock_t* father; // set only in the case of a son + instsize_t* instsize; +} dynablock_t; + +typedef struct dynablocklist_s { + uintptr_t text; + int textsz; + int maxsz; // maxblock size (for this block or previous block) + dynablock_t** direct; // direct mapping (waste of space, so the array is created at first write) +} dynablocklist_t; + +#endif //__DYNABLOCK_PRIVATE_H_ \ No newline at end of file diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c index c88cd61f..8769bc1e 100755 --- a/src/dynarec/dynarec.c +++ b/src/dynarec/dynarec.c @@ -22,10 +22,10 @@ #endif #ifdef DYNAREC -#ifdef ARM -void arm_prolog(x64emu_t* emu, void* addr) EXPORTDYN; -void arm_epilog() EXPORTDYN; -void arm_epilog_fast() EXPORTDYN; +#ifdef ARM64 +void arm64_prolog(x64emu_t* emu, void* addr) EXPORTDYN; +void arm64_epilog() EXPORTDYN; +void arm64_epilog_fast() EXPORTDYN; #endif #endif @@ -39,7 +39,7 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2) if(!addr) { x2-=8; // actual PC is 2 instructions ahead dynablock_t* db = FindDynablockFromNativeAddress(x2); - printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x86addr=%p)\n", x2, db, db?(void*)getX86Address(db, (uintptr_t)x2):NULL); + printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x64addr=%p)\n", x2, db, db?(void*)getX64Address(db, (uintptr_t)x2):NULL); } #endif dynablock_t* current = NULL; @@ -47,17 +47,17 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2) dynablock_t* block = DBGetBlock(emu, addr, 1, ¤t); if(!block) { // no block, let link table as is... - //tableupdate(arm_epilog, addr, table); - return arm_epilog; + //tableupdate(arm64_epilog, addr, table); + return arm64_epilog; } if(!block->done) { // not finished yet... leave linker //tableupdate(arm_linker, addr, table); - return arm_epilog; + return arm64_epilog; } if(!(jblock=block->block)) { // null block, but done: go to epilog, no linker here - return arm_epilog; + return arm64_epilog; } //dynablock_t *father = block->father?block->father:block; return jblock; @@ -82,7 +82,7 @@ void DynaCall(x64emu_t* emu, uintptr_t addr) } } #ifdef DYNAREC - if(!box86_dynarec) + if(!box64_dynarec) #endif EmuCall(emu, addr); #ifdef DYNAREC @@ -107,18 +107,18 @@ void DynaCall(x64emu_t* emu, uintptr_t addr) dynarec_log(LOG_DEBUG, "%04d|Calling Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu); Run(emu, 1); } else { - dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x86 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu); + dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu); CHECK_FLAGS(emu); // block is here, let's run it! - #ifdef ARM - arm_prolog(emu, block->block); + #ifdef ARM64 + arm64_prolog(emu, block->block); #endif } if(emu->fork) { int forktype = emu->fork; emu->quit = 0; emu->fork = 0; - emu = x86emu_fork(emu, forktype); + emu = x64emu_fork(emu, forktype); if(emu->type == EMUTYPE_MAIN) { ejb = GetJmpBuf(); ejb->emu = emu; @@ -170,7 +170,7 @@ int DynaRun(x64emu_t* emu) } } #ifdef DYNAREC - if(!box86_dynarec) + if(!box64_dynarec) #endif return Run(emu, 0); #ifdef DYNAREC @@ -186,17 +186,17 @@ int DynaRun(x64emu_t* emu) dynarec_log(LOG_DEBUG, "%04d|Running Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu); Run(emu, 1); } else { - dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x86 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu); + dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu); // block is here, let's run it! - #ifdef ARM - arm_prolog(emu, block->block); + #ifdef ARM64 + arm64_prolog(emu, block->block); #endif } if(emu->fork) { int forktype = emu->fork; emu->quit = 0; emu->fork = 0; - emu = x86emu_fork(emu, forktype); + emu = x64emu_fork(emu, forktype); if(emu->type == EMUTYPE_MAIN) { ejb = GetJmpBuf(); ejb->emu = emu; diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c new file mode 100755 index 00000000..b3b0b06c --- /dev/null +++ b/src/dynarec/dynarec_arm64.c @@ -0,0 +1,449 @@ +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include <string.h> + +#include "debug.h" +#include "box64context.h" +#include "custommem.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynablock_private.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "elfloader.h" + +void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) { + uint8_t *ip = (uint8_t*)inst->addr; + if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') { + uintptr_t a = *(uintptr_t*)(ip+3); + if(a==0) { + dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":""); + } else { + dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":""); + } + } else { + if(dec) { + dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[1m":"", ip, DecodeX64Trace(dec, inst->addr)); + } else { + dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[1m":"", ip); + for(int i=0; i<inst->size; ++i) { + dynarec_log(LOG_NONE, "%02X ", ip[i]); + } + dynarec_log(LOG_NONE, " %s", name); + } + // print Call function name if possible + if(ip[0]==0xE8 || ip[0]==0xE9) { // Call / Jmp + uintptr_t nextaddr = (uintptr_t)ip + 5 + *((int32_t*)(ip+1)); + printFunctionAddr(nextaddr, "=> "); + } else if(ip[0]==0xFF) { + if(ip[1]==0x25) { + uintptr_t nextaddr = (uintptr_t)ip + 6 + *((int32_t*)(ip+2)); + printFunctionAddr(nextaddr, "=> "); + } + } + // end of line and colors + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); + } +} + +void add_next(dynarec_arm_t *dyn, uintptr_t addr) { + if(dyn->next_sz == dyn->next_cap) { + dyn->next_cap += 16; + dyn->next = (uintptr_t*)realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t)); + } + for(int i=0; i<dyn->next_sz; ++i) + if(dyn->next[i]==addr) + return; + dyn->next[dyn->next_sz++] = addr; +} +uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) { + // get closest, but no addresses befores + uintptr_t best = 0; + int i = 0; + while((i<dyn->next_sz) && (best!=addr)) { + if(dyn->next[i]<addr) { // remove the address, it's before current address + memmove(dyn->next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t)); + --dyn->next_sz; + } else { + if((dyn->next[i]<best) || !best) + best = dyn->next[i]; + ++i; + } + } + return best; +} +#define PK(A) (*((uint8_t*)(addr+(A)))) +int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n) +{ + if(!n) + return 1; + if(PK(0)==0x90) + return is_nops(dyn, addr+1, n-1); + if(n>1 && PK(0)==0x66) // if opcode start with 0x66, and there is more after, than is *can* be a NOP + return is_nops(dyn, addr+1, n-1); + if(n>2 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x00) + return is_nops(dyn, addr+3, n-3); + if(n>2 && PK(0)==0x8d && PK(1)==0x76 && PK(2)==0x00) // lea esi, [esi] + return is_nops(dyn, addr+3, n-3); + if(n>3 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x40 && PK(3)==0x00) + return is_nops(dyn, addr+4, n-4); + if(n>3 && PK(0)==0x8d && PK(1)==0x74 && PK(2)==0x26 && PK(3)==0x00) + return is_nops(dyn, addr+4, n-4); + if(n>4 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x44 && PK(3)==0x00 && PK(4)==0x00) + return is_nops(dyn, addr+5, n-5); + if(n>5 && PK(0)==0x8d && PK(1)==0xb6 && PK(2)==0x00 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00) + return is_nops(dyn, addr+6, n-6); + if(n>6 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x80 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) + return is_nops(dyn, addr+7, n-7); + if(n>6 && PK(0)==0x8d && PK(1)==0xb4 && PK(2)==0x26 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) // lea esi, [esi+0] + return is_nops(dyn, addr+7, n-7); + if(n>7 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x84 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00 && PK(7)==0x00) + return is_nops(dyn, addr+8, n-8); + return 0; +} + +// return size of next instuciton, -1 is unknown +// not all instrction are setup +int next_instruction(dynarec_arm_t *dyn, uintptr_t addr) +{ + uint8_t opcode = PK(0); + uint8_t nextop; + switch (opcode) { + case 0x66: + opcode = PK(1); + switch(opcode) { + case 0x90: + return 2; + } + break; + case 0x81: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr + 4; + case 0x83: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr + 1; + case 0x84: + case 0x85: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr; + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + return 1; + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + return 5; + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return 2; + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + return 5; + case 0xFF: + nextop = PK(1); + switch((nextop>>3)&7) { + case 0: // INC Ed + case 1: //DEC Ed + case 2: // CALL Ed + case 4: // JMP Ed + case 6: // Push Ed + return fakeed(dyn, addr+2, 0, nextop)-addr; + } + break; + default: + break; + } + return -1; +} +#undef PK + +int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n) +{ + int i = 0; + while(i<n) { + int j=next_instruction(dyn, addr+i); + if(j<=0) return 0; + i+=j; + } + return (i==n)?1:0; +} + +uint32_t needed_flags(dynarec_arm_t *dyn, int ninst, uint32_t setf, int recurse) +{ + if(recurse == 10) + return X_PEND; + if(ninst == dyn->size) + return X_PEND; // no more instructions, or too many jmp loop, stop + + uint32_t needed = dyn->insts[ninst].x64.use_flags; + if(needed) { + setf &= ~needed; + if(!setf) // all flags already used, no need to continue + return needed; + } + + if(!needed && !dyn->insts[ninst].x64.set_flags && !dyn->insts[ninst].x64.jmp_insts) { + int start = ninst; + int end = ninst; + while(end<dyn->size && !dyn->insts[end].x64.use_flags && !dyn->insts[end].x64.set_flags && !dyn->insts[end].x64.jmp_insts) + ++end; + needed = needed_flags(dyn, end, setf, recurse); + for(int i=start; i<end; ++i) + dyn->insts[i].x64.need_flags = needed; + return needed; + } + + if(dyn->insts[ninst].x64.set_flags && (dyn->insts[ninst].x64.state_flags!=SF_MAYSET)) { + if((setf & ~dyn->insts[ninst].x64.set_flags) == 0) + return needed; // all done, gives all the flags needed + setf |= dyn->insts[ninst].x64.set_flags; // add new flags to continue + } + + int jinst = dyn->insts[ninst].x64.jmp_insts; + if(dyn->insts[ninst].x64.jmp) { + dyn->insts[ninst].x64.need_flags = (jinst==-1)?X_PEND:needed_flags(dyn, jinst, setf, recurse+1); + if(dyn->insts[ninst].x64.use_flags) // conditionnal jump + dyn->insts[ninst].x64.need_flags |= needed_flags(dyn, ninst+1, setf, recurse); + } else + dyn->insts[ninst].x64.need_flags = needed_flags(dyn, ninst+1, setf, recurse); + if(dyn->insts[ninst].x64.state_flags==SF_MAYSET) + needed |= dyn->insts[ninst].x64.need_flags; + else + needed |= (dyn->insts[ninst].x64.need_flags & ~dyn->insts[ninst].x64.set_flags); + if(needed == (X_PEND|X_ALL)) + needed = X_ALL; + return needed; +} + +instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int arm_size) +{ + // x64 instruction is <16 bytes + int toadd; + if(x64_size>arm_size) + toadd = 1 + x64_size/15; + else + toadd = 1 + arm_size/15; + if((*size)+toadd>(*cap)) { + *cap = (*size)+toadd; + insts = (instsize_t*)realloc(insts, (*cap)*sizeof(instsize_t)); + } + while(toadd) { + if(x64_size>15) + insts[*size].x64 = 15; + else + insts[*size].x64 = x64_size; + x64_size -= insts[*size].x64; + if(arm_size>15) + insts[*size].nat = 15; + else + insts[*size].nat = arm_size; + arm_size -= insts[*size].nat; + ++(*size); + --toadd; + } + return insts; +} + +void arm_pass0(dynarec_arm_t* dyn, uintptr_t addr); +void arm_pass1(dynarec_arm_t* dyn, uintptr_t addr); +void arm_pass2(dynarec_arm_t* dyn, uintptr_t addr); +void arm_pass3(dynarec_arm_t* dyn, uintptr_t addr); + +void* FillBlock(dynablock_t* block, uintptr_t addr) { + if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end) + return NULL; + // init the helper + dynarec_arm_t helper = {0}; + helper.start = addr; + arm_pass0(&helper, addr); + if(!helper.size) { + dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr); + block->done = 1; + free(helper.next); + return (void*)block; + } + helper.cap = helper.size+3; // needs epilog handling + helper.insts = (instruction_arm64_t*)calloc(helper.cap, sizeof(instruction_arm64_t)); + // pass 1, addresses, x64 jump addresses, flags + arm_pass1(&helper, addr); + // calculate barriers + uintptr_t start = helper.insts[0].x64.addr; + uintptr_t end = helper.insts[helper.size].x64.addr+helper.insts[helper.size].x64.size; + for(int i=0; i<helper.size; ++i) + if(helper.insts[i].x64.jmp) { + uintptr_t j = helper.insts[i].x64.jmp; + if(j<start || j>=end) + helper.insts[i].x64.jmp_insts = -1; + else { + // find jump address instruction + int k=-1; + for(int i2=0; i2<helper.size && k==-1; ++i2) { + if(helper.insts[i2].x64.addr==j) + k=i2; + } + if(k!=-1) // -1 if not found, mmm, probably wrong, exit anyway + helper.insts[k].x64.barrier = 1; + helper.insts[i].x64.jmp_insts = k; + } + } + for(int i=0; i<helper.size; ++i) + if(helper.insts[i].x64.set_flags && !helper.insts[i].x64.need_flags) { + helper.insts[i].x64.need_flags = needed_flags(&helper, i+1, helper.insts[i].x64.set_flags, 0); + if((helper.insts[i].x64.need_flags&X_PEND) && (helper.insts[i].x64.state_flags==SF_MAYSET)) + helper.insts[i].x64.need_flags = X_ALL; + } + + // pass 2, instruction size + arm_pass2(&helper, addr); + // ok, now allocate mapped memory, with executable flag on + int sz = helper.arm_size; + void* p = (void*)AllocDynarecMap(block, sz); + if(p==NULL) { + dynarec_log(LOG_DEBUG, "AllocDynarecMap(%p, %d) failed, cancelling block\n", block, sz); + free(helper.insts); + free(helper.next); + return NULL; + } + helper.block = p; + helper.arm_start = (uintptr_t)p; + if(helper.sons_size) { + helper.sons_x64 = (uintptr_t*)calloc(helper.sons_size, sizeof(uintptr_t)); + helper.sons_arm = (void**)calloc(helper.sons_size, sizeof(void*)); + } + // pass 3, emit (log emit arm opcode) + if(box64_dynarec_dump) { + dynarec_log(LOG_NONE, "%s%04d|Emitting %d bytes for %d x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.arm_size, helper.isize); + printFunctionAddr(helper.start, " => "); + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); + } + helper.arm_size = 0; + arm_pass3(&helper, addr); + if(sz!=helper.arm_size) { + printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%d) & pass3 (%d)!\n", sz, helper.arm_size); + uint8_t *dump = (uint8_t*)helper.start; + printf_log(LOG_NONE, "Dump of %d x64 opcodes:\n", helper.size); + for(int i=0; i<helper.size; ++i) { + printf_log(LOG_NONE, "%p:", dump); + for(; dump<(uint8_t*)helper.insts[i+1].x64.addr; ++dump) + printf_log(LOG_NONE, " %02X", *dump); + printf_log(LOG_NONE, "\t%d -> %d\n", helper.insts[i].size2, helper.insts[i].size); + } + printf_log(LOG_NONE, " ------------\n"); + } + // all done... + __clear_cache(p, p+sz); // need to clear the cache before execution... + // keep size of instructions for signal handling + { + size_t cap = 1; + for(int i=0; i<helper.size; ++i) + cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; + size_t size = 0; + block->instsize = (instsize_t*)calloc(cap, sizeof(instsize_t)); + for(int i=0; i<helper.size; ++i) + block->instsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4); + block->instsize = addInst(block->instsize, &size, &cap, 0, 0); // add a "end of block" mark, just in case + } + // ok, free the helper now + free(helper.insts); + free(helper.next); + block->size = sz; + block->isize = helper.size; + block->block = p; + block->need_test = 0; + //block->x64_addr = (void*)start; + block->x64_size = end-start; + if(box64_dynarec_largest<block->x64_size) + box64_dynarec_largest = block->x64_size; + block->hash = X31_hash_code(block->x64_addr, block->x64_size); + // fill sons if any + dynablock_t** sons = NULL; + int sons_size = 0; + if(helper.sons_size) { + sons = (dynablock_t**)calloc(helper.sons_size, sizeof(dynablock_t*)); + for (int i=0; i<helper.sons_size; ++i) { + int created = 1; + dynablock_t *son = AddNewDynablock(block->parent, helper.sons_x64[i], &created); + if(created) { // avoid breaking a working block! + son->block = helper.sons_arm[i]; + son->x64_addr = (void*)helper.sons_x64[i]; + son->x64_size = end-helper.sons_x64[i]; + if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / ARM=%p)", son->x64_addr, son->block);} + son->father = block; + son->done = 1; + sons[sons_size++] = son; + if(!son->parent) + son->parent = block->parent; + } + } + if(sons_size) { + block->sons = sons; + block->sons_size = sons_size; + } else + free(sons); + } + free(helper.sons_x64); + free(helper.sons_arm); + block->done = 1; + return (void*)block; +} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c new file mode 100755 index 00000000..784739ac --- /dev/null +++ b/src/dynarec/dynarec_arm64_functions.c @@ -0,0 +1,354 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include <string.h> +#include <math.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "emu/x87emu_private.h" +#include "x64trace.h" +#include "signals.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" + +void arm_fstp(x64emu_t* emu, void* p) +{ + if(ST0.q!=STld(0).ref) + D2LD(&ST0.d, p); + else + memcpy(p, &STld(0).ld, 10); +} + +void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) +{ + dynarec_log(LOG_DEBUG, "R%ld=0x%lx (%ld)\n", n, reg, reg); +} + +void arm_f2xm1(x64emu_t* emu) +{ + ST0.d = exp2(ST0.d) - 1.0; +} +void arm_fyl2x(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d)*ST(1).d; +} +void arm_ftan(x64emu_t* emu) +{ + ST0.d = tan(ST0.d); +} +void arm_fpatan(x64emu_t* emu) +{ + ST1.d = atan2(ST1.d, ST0.d); +} +void arm_fxtract(x64emu_t* emu) +{ + int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52; + tmp32s -= 1023; + ST1.d /= exp2(tmp32s); + ST0.d = tmp32s; +} +void arm_fprem(x64emu_t* emu) +{ + int32_t tmp32s = ST0.d / ST1.d; + ST0.d -= ST1.d * tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} +void arm_fyl2xp1(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d + 1.0)*ST(1).d; +} +void arm_fsincos(x64emu_t* emu) +{ + sincos(ST1.d, &ST1.d, &ST0.d); +} +void arm_frndint(x64emu_t* emu) +{ + ST0.d = fpu_round(emu, ST0.d); +} +void arm_fscale(x64emu_t* emu) +{ + ST0.d *= exp2(trunc(ST1.d)); +} +void arm_fsin(x64emu_t* emu) +{ + ST0.d = sin(ST0.d); +} +void arm_fcos(x64emu_t* emu) +{ + ST0.d = cos(ST0.d); +} + +void arm_fbld(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbld(emu, ed); +} + +void arm_fild64(x64emu_t* emu, int64_t* ed) +{ + int64_t tmp; + memcpy(&tmp, ed, sizeof(tmp)); + ST0.d = tmp; + STll(0).ll = tmp; + STll(0).ref = ST0.q; +} + +void arm_fbstp(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbst(emu, ed); +} + +void arm_fistp64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + if(STll(0).ref==ST(0).q) { + memcpy(ed, &STll(0).ll, sizeof(int64_t)); + } else { + int64_t tmp; + if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) + tmp = 0x8000000000000000LL; + else + tmp = fpu_round(emu, ST0.d); + memcpy(ed, &tmp, sizeof(tmp)); + } +} + +void arm_fistt64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + int64_t tmp = ST0.d; + memcpy(ed, &tmp, sizeof(tmp)); +} + +void arm_fld(x64emu_t* emu, uint8_t* ed) +{ + memcpy(&STld(0).ld, ed, 10); + LD2D(&STld(0), &ST(0).d); + STld(0).ref = ST0.q; +} + +void arm_ud(x64emu_t* emu) +{ + emit_signal(emu, SIGILL, (void*)R_RIP, 0); +} + +void arm_fsave(x64emu_t* emu, uint8_t* ed) +{ + fpu_savenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + LD2D(p, &ST(i).d); + p+=10; + } +} +void arm_frstor(x64emu_t* emu, uint8_t* ed) +{ + fpu_loadenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + D2LD(&ST(i).d, p); + p+=10; + } + +} + +void arm_fprem1(x64emu_t* emu) +{ + // simplified version + int32_t tmp32s = round(ST0.d / ST1.d); + ST0.d -= ST1.d*tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} + + +// Get a FPU single scratch reg +int fpu_get_scratch_single(dynarec_arm_t* dyn) +{ + return dyn->fpu_scratch++; // return an Sx +} +// Get a FPU double scratch reg +int fpu_get_scratch_double(dynarec_arm_t* dyn) +{ + int i = (dyn->fpu_scratch+1)&(~1); + dyn->fpu_scratch = i+2; + return i/2; // return a Dx +} +// Get a FPU quad scratch reg +int fpu_get_scratch_quad(dynarec_arm_t* dyn) +{ + if(dyn->fpu_scratch>4) { + if(dyn->fpu_extra_qscratch) { + dynarec_log(LOG_NONE, "Warning, Extra QScratch slot taken and need another one!\n"); + } else + dyn->fpu_extra_qscratch = fpu_get_reg_quad(dyn); + return dyn->fpu_extra_qscratch; + } + int i = (dyn->fpu_scratch+3)&(~3); + dyn->fpu_scratch = i+4; + return i/2; // Return a Dx, not a Qx +} +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_arm_t* dyn) +{ + dyn->fpu_scratch = 0; + if(dyn->fpu_extra_qscratch) { + fpu_free_reg_quad(dyn, dyn->fpu_extra_qscratch); + dyn->fpu_extra_qscratch = 0; + } +} +#define FPUFIRST 8 +// Get a FPU double reg +int fpu_get_reg_double(dynarec_arm_t* dyn) +{ + // TODO: check upper limit? + int i=0; + while (dyn->fpuused[i]) ++i; + dyn->fpuused[i] = 1; + return i+FPUFIRST; // return a Dx +} +// Free a FPU double reg +void fpu_free_reg_double(dynarec_arm_t* dyn, int reg) +{ + // TODO: check upper limit? + int i=reg-FPUFIRST; + dyn->fpuused[i] = 0; +} +// Get a FPU quad reg +int fpu_get_reg_quad(dynarec_arm_t* dyn) +{ + int i=0; + while (dyn->fpuused[i] || dyn->fpuused[i+1]) i+=2; + dyn->fpuused[i] = dyn->fpuused[i+1] = 1; + return i+FPUFIRST; // Return a Dx, not a Qx +} +// Free a FPU quad reg +void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg) +{ + int i=reg-FPUFIRST; + dyn->fpuused[i] = dyn->fpuused[i+1] = 0; +} +// Reset fpu regs counter +void fpu_reset_reg(dynarec_arm_t* dyn) +{ + dyn->fpu_reg = 0; + for (int i=0; i<24; ++i) + dyn->fpuused[i]=0; +} + +#define F8 *(uint8_t*)(addr++) +#define F32 *(uint32_t*)(addr+=4, addr-4) +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity) +{ + + uint32_t tested = (1<<parity)-1; + if((nextop&0xC0)==0xC0) + return 0; // direct register, no parity... + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = (sib>>3)&7; + if((sib&0x7)==5) { + uint32_t tmp = F32; + if (sib_reg!=4) { + // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough + return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0; + } else { + // just a constant... + return (tmp&tested)?0:1; + } + } else { + if(sib_reg==4 && parity<3) + return 0; // simple [reg] + // don't try [reg1 + reg2<<N], unless reg1 is ESP + return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0; + } + } else if((nextop&7)==5) { + uint32_t tmp = F32; + return (tmp&tested)?0:1; + } else { + return 0; + } + } else { + return 0; //Form [reg1 + reg2<<N + XXXXXX] + } +} + +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) +{ + if((nextop&0xC0)==0xC0) + return addr; + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + if((sib&0x7)==5) { + addr+=4; + } + } else if((nextop&7)==5) { + addr+=4; + } + } else { + if((nextop&7)==4) { + ++addr; + } + if(nextop&0x80) { + addr+=4; + } else { + ++addr; + } + } + return addr; +} +#undef F8 +#undef F32 + +int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) +{ +#define PK(a) *(uint8_t*)(addr+a) +#define PK64(a) *(uint64_t*)(addr+a) + + if(!addr) + return 0; + if(PK(0)==0xff && PK(1)==0x25) { // absolute jump, maybe the GOT + uintptr_t a1 = (PK64(2)); // need to add a check to see if the address is from the GOT ! + addr = *(uintptr_t*)a1; + } + if(addr<0x10000) // too low, that is suspicious + return 0; + onebridge_t *b = (onebridge_t*)(addr); + if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) { + // found ! + if(retn) *retn = (b->C3==0xC2)?b->N:0; + if(calladdress) *calladdress = addr+1; + return 1; + } + return 0; +#undef PK32 +#undef PK +} + diff --git a/src/dynarec/dynarec_arm64_functions.h b/src/dynarec/dynarec_arm64_functions.h new file mode 100755 index 00000000..d932aa4d --- /dev/null +++ b/src/dynarec/dynarec_arm64_functions.h @@ -0,0 +1,61 @@ +#ifndef __DYNAREC_ARM_FUNCTIONS_H__ +#define __DYNAREC_ARM_FUNCTIONS_H__ + +typedef struct x64emu_s x64emu_t; + +void arm_fstp(x64emu_t* emu, void* p); + +void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n); + +void arm_f2xm1(x64emu_t* emu); +void arm_fyl2x(x64emu_t* emu); +void arm_ftan(x64emu_t* emu); +void arm_fpatan(x64emu_t* emu); +void arm_fxtract(x64emu_t* emu); +void arm_fprem(x64emu_t* emu); +void arm_fyl2xp1(x64emu_t* emu); +void arm_fsincos(x64emu_t* emu); +void arm_frndint(x64emu_t* emu); +void arm_fscale(x64emu_t* emu); +void arm_fsin(x64emu_t* emu); +void arm_fcos(x64emu_t* emu); +void arm_fbld(x64emu_t* emu, uint8_t* ed); +void arm_fild64(x64emu_t* emu, int64_t* ed); +void arm_fbstp(x64emu_t* emu, uint8_t* ed); +void arm_fistp64(x64emu_t* emu, int64_t* ed); +void arm_fistt64(x64emu_t* emu, int64_t* ed); +void arm_fld(x64emu_t* emu, uint8_t* ed); +void arm_fsave(x64emu_t* emu, uint8_t* ed); +void arm_frstor(x64emu_t* emu, uint8_t* ed); +void arm_fprem1(x64emu_t* emu); + +void arm_ud(x64emu_t* emu); + +// Get an FPU single scratch reg +int fpu_get_scratch_single(dynarec_arm_t* dyn); +// Get an FPU double scratch reg +int fpu_get_scratch_double(dynarec_arm_t* dyn); +// Get an FPU quad scratch reg +int fpu_get_scratch_quad(dynarec_arm_t* dyn); +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_arm_t* dyn); +// Get an FPU double reg +int fpu_get_reg_double(dynarec_arm_t* dyn); +// Free a FPU double reg +void fpu_free_reg_double(dynarec_arm_t* dyn, int reg); +// Get an FPU quad reg +int fpu_get_reg_quad(dynarec_arm_t* dyn); +// Free a FPU quad reg +void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg); +// Reset fpu regs counter +void fpu_reset_reg(dynarec_arm_t* dyn); + +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity); +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); + +// Is what pointed at addr a native call? And if yes, to what function? +int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); + +#endif //__DYNAREC_ARM_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h new file mode 100755 index 00000000..8101cc2b --- /dev/null +++ b/src/dynarec/dynarec_arm64_private.h @@ -0,0 +1,56 @@ +#ifndef __DYNAREC_ARM_PRIVATE_H_ +#define __DYNAREC_ARM_PRIVATE_H_ + +#include "dynarec_private.h" + +typedef struct x64emu_s x64emu_t; + +typedef struct instruction_arm_s { + instruction_x64_t x64; + uintptr_t address; // (start) address of the arm emited instruction + uintptr_t epilog; // epilog of current instruction (can be start of next, of barrier stuff) + int size; // size of the arm emited instruction + int size2; // size of the arm emited instrucion after pass2 + uintptr_t mark, mark2, mark3; + uintptr_t markf; + uintptr_t markseg; + uintptr_t marklock; + int pass2choice;// value for choices that are fixed on pass2 for pass3 + uintptr_t natcall; + int retn; +} instruction_arm64_t; + +typedef struct dynarec_arm_s { + instruction_arm64_t *insts; + int32_t size; + int32_t cap; + uintptr_t start; // start of the block + uint32_t isize; // size in byte of x64 instructions included + void* block; // memory pointer where next instruction is emited + uintptr_t arm_start; // start of the arm code + int arm_size; // size of emitted arm code + int state_flags;// actual state for on-demand flags + int x87cache[8];// cache status for the 8 x87 register behind the fpu stack + int x87reg[8]; // reg used for x87cache entry + int mmxcache[8];// cache status for the 8 MMX registers + int ssecache[8];// cache status for the 8 SSE(2) registers + int fpuused[24];// all 8..31 double reg from fpu, used by x87, sse and mmx + int x87stack; // cache stack counter + int fpu_scratch;// scratch counter + int fpu_extra_qscratch; // some opcode need an extra quad scratch register + int fpu_reg; // x87/sse/mmx reg counter + int dfnone; // if defered flags is already set to df_none + uintptr_t* next; // variable array of "next" jump address + int next_sz; + int next_cap; + uintptr_t* sons_x64; // the x64 address of potential dynablock sons + void** sons_arm; // the arm address of potential dynablock sons + int sons_size; // number of potential dynablock sons +} dynarec_arm_t; + +void add_next(dynarec_arm_t *dyn, uintptr_t addr); +uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); +int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); +int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); + +#endif //__DYNAREC_ARM_PRIVATE_H_ \ No newline at end of file diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h new file mode 100755 index 00000000..66eb3bfa --- /dev/null +++ b/src/dynarec/dynarec_private.h @@ -0,0 +1,35 @@ +#ifndef __DYNAREC_PRIVATE_H_ +#define __DYNAREC_PRIVATE_H_ + +// all flags for the use_flags field +#define X_CF (1<<0) +#define X_PF (1<<1) +#define X_AF (1<<2) +#define X_ZF (1<<3) +#define X_SF (1<<4) +#define X_OF (1<<5) +#define X_ALL ((1<<6)-1) +#define X_PEND (0x1000) + +// all state flags +#define SF_UNKNOWN 0 +#define SF_SET 1 +#define SF_PENDING 2 +#define SF_MAYSET 3 +#define SF_SUBSET 4 + +typedef struct instruction_x64_s { + uintptr_t addr; //address of the instruction + int32_t size; // size of the instruction + int barrier; // next instruction is a jump point, so no optim allowed + uintptr_t jmp; // offset to jump to, even if conditionnal (0 if not), no relative offset here + int jmp_insts; // instuction to jump to (-1 if out of the block) + uint32_t use_flags; // 0 or combination of X_?F + uint32_t set_flags; // 0 or combination of X_?F + uint32_t need_flags; // calculated + int state_flags; // One of SF_XXX state +} instruction_x64_t; + +void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name); + +#endif //__DYNAREC_PRIVATE_H_ diff --git a/src/elfs/elfloader.c b/src/elfs/elfloader.c index a9335de3..f821fd4b 100755 --- a/src/elfs/elfloader.c +++ b/src/elfs/elfloader.c @@ -25,7 +25,6 @@ #include "x64emu.h" #include "box64stack.h" #include "callback.h" -//#include "dynarec.h" #include "box64stack.h" #include "custommem.h" #include "wine_tools.h" @@ -293,7 +292,7 @@ int LoadElfMemory(FILE* f, box64context_t* context, elfheader_t* head) } } #ifdef DYNAREC - if(e->p_flags & PF_X) { + if(box64_dynarec && (e->p_flags & PF_X)) { dynarec_log(LOG_DEBUG, "Add ELF eXecutable Memory %p:%p\n", dest, (void*)e->p_memsz); addDBFromAddressRange((uintptr_t)dest, e->p_memsz); } @@ -334,7 +333,8 @@ int ReloadElfMemory(FILE* f, box64context_t* context, elfheader_t* head) if(e->p_filesz) { ssize_t r = -1; #ifdef DYNAREC - unprotectDB((uintptr_t)dest, e->p_memsz); + if(box64_dynarec) + unprotectDB((uintptr_t)dest, e->p_memsz); #endif if((r=fread(dest, e->p_filesz, 1, f))!=1) { printf_log(LOG_NONE, "Fail to (re)read PT_LOAD part #%d (dest=%p, size=%ld, return=%ld, feof=%d/ferror=%d/%s)\n", i, dest, e->p_filesz, r, feof(f), ferror(f), strerror(ferror(f))); @@ -577,7 +577,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, elfheader_t* head, int c if(offs) { // add r_addend to p? printf_log(LOG_DUMP, "Apply R_X86_64_COPY @%p with sym=%s, @%p size=%ld\n", p, symname, (void*)offs, sym->st_size); - memcpy(p, (void*)(offs+rela[i].r_addend), sym->st_size); + memmove(p, (void*)(offs+rela[i].r_addend), sym->st_size); } else { printf_log(LOG_NONE, "Error: Symbol %s not found, cannot apply RELA R_X86_64_COPY @%p (%p) in %s\n", symname, p, *(void**)p, head->name); } diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index b4415673..05570965 100755 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -10,11 +10,6 @@ typedef struct x64_ucontext_s x64_ucontext_t; #define ERR_DIVBY0 2 #define ERR_ILLEGAL 4 -#ifdef DYNAREC -#define CSTACK 32 -#define CSTACKMASK 31 -#endif - typedef struct forkpty_s { void* amaster; void* name; diff --git a/src/emu/x64run.c b/src/emu/x64run.c index 0a393466..5608cc00 100755 --- a/src/emu/x64run.c +++ b/src/emu/x64run.c @@ -21,7 +21,7 @@ #include "bridge.h" #include "signals.h" #ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" +#include "../dynarec/arm64_lock_helper.h" #endif #include "modrm.h" @@ -381,15 +381,16 @@ x64emurun: case 0x86: /* XCHG Eb,Gb */ nextop = F8; #ifdef DYNAREC - GET_EB; + GETEB(0); + GETGB; if((nextop&0xC0)==0xC0) { // reg / reg: no lock tmp8u = GB; GB = EB->byte[0]; EB->byte[0] = tmp8u; } else { do { - tmp8u = arm_lock_read_b(EB); - } while(arm_lock_write_b(EB, GB)); + tmp8u = arm64_lock_read_b(EB); + } while(arm64_lock_write_b(EB, GB)); GB = tmp8u; } // dynarec use need it's own mecanism @@ -408,21 +409,26 @@ x64emurun: case 0x87: /* XCHG Ed,Gd */ nextop = F8; #ifdef DYNAREC - GET_ED; + GETED(0); + GETGD; if((nextop&0xC0)==0xC0) { - tmp32u = GD.dword[0]; - GD.dword[0] = ED->dword[0]; - ED->dword[0] = tmp32u; + if(rex.w) { + tmp64u = GD->q[0]; + GD->q[0] = ED->q[0]; + ED->q[0] = tmp64u; + } else { + tmp32u = GD->dword[0]; + GD->q[0] = ED->dword[0]; + ED->q[0] = tmp32u; + } } else { - if(((uintptr_t)ED)&3) - { - // not aligned, dont't try to "LOCK" - tmp32u = ED->dword[0]; - ED->dword[0] = GD.dword[0]; - GD.dword[0] = tmp32u; + if(rex.w) { + GD->q[0] = arm64_lock_xchg(ED, GD->q[0]); } else { - // XCHG is supposed to automaticaly LOCK memory bus - GD.dword[0] = arm_lock_xchg(ED, GD.dword[0]); + do { + tmp32u = arm64_lock_read_d(ED); + } while(arm64_lock_write_d(ED, GD->dword[0])); + GD->q[0] = tmp32u; } } #else diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c index 8ad1584f..ec0048f4 100644 --- a/src/emu/x64run0f.c +++ b/src/emu/x64run0f.c @@ -22,7 +22,7 @@ #include "bridge.h" #include "signals.h" #ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" +#include "../dynarec/arm64_lock_helper.h" #endif #include "modrm.h" diff --git a/src/emu/x64run64.c b/src/emu/x64run64.c index 20c087ac..efc90e75 100644 --- a/src/emu/x64run64.c +++ b/src/emu/x64run64.c @@ -19,10 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64run66.c b/src/emu/x64run66.c index 0eefff20..377ece8c 100644 --- a/src/emu/x64run66.c +++ b/src/emu/x64run66.c @@ -19,9 +19,8 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -//#include "signals.h" #ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" +#include "../dynarec/arm64_lock_helper.h" #endif #include "modrm.h" diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c index db4cbe25..abcf0253 100644 --- a/src/emu/x64run660f.c +++ b/src/emu/x64run660f.c @@ -19,10 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c index f3001290..c61938be 100644 --- a/src/emu/x64run6664.c +++ b/src/emu/x64run6664.c @@ -19,10 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64run66d9.c b/src/emu/x64run66d9.c index fae424ba..e355ca46 100644 --- a/src/emu/x64run66d9.c +++ b/src/emu/x64run66d9.c @@ -18,12 +18,7 @@ #include "x64trace.h" #include "x87emu_private.h" #include "box64context.h" -//#include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64run66dd.c b/src/emu/x64run66dd.c index 1ef8e93c..8b2b7326 100644 --- a/src/emu/x64run66dd.c +++ b/src/emu/x64run66dd.c @@ -18,12 +18,7 @@ #include "x64trace.h" #include "x87emu_private.h" #include "box64context.h" -//#include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64run67.c b/src/emu/x64run67.c index 1d443611..a956072a 100644 --- a/src/emu/x64run67.c +++ b/src/emu/x64run67.c @@ -19,10 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64rund8.c b/src/emu/x64rund8.c index e0f1017e..60b9e151 100644 --- a/src/emu/x64rund8.c +++ b/src/emu/x64rund8.c @@ -19,9 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64rund9.c b/src/emu/x64rund9.c index 253204e8..b7f2df5f 100644 --- a/src/emu/x64rund9.c +++ b/src/emu/x64rund9.c @@ -18,12 +18,7 @@ #include "x64trace.h" #include "x87emu_private.h" #include "box64context.h" -//#include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64runda.c b/src/emu/x64runda.c index 65115b0c..70a60222 100644 --- a/src/emu/x64runda.c +++ b/src/emu/x64runda.c @@ -19,9 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64rundb.c b/src/emu/x64rundb.c index 772748a6..0cd9c5dd 100644 --- a/src/emu/x64rundb.c +++ b/src/emu/x64rundb.c @@ -18,12 +18,7 @@ #include "x64trace.h" #include "x87emu_private.h" #include "box64context.h" -//#include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64rundd.c b/src/emu/x64rundd.c index 73678e2b..ce6081cc 100644 --- a/src/emu/x64rundd.c +++ b/src/emu/x64rundd.c @@ -19,9 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c index 40186287..613c90f8 100644 --- a/src/emu/x64rundf.c +++ b/src/emu/x64rundf.c @@ -18,12 +18,7 @@ #include "x64trace.h" #include "x87emu_private.h" #include "box64context.h" -//#include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c index 3b027137..faf1a109 100644 --- a/src/emu/x64runf0.c +++ b/src/emu/x64runf0.c @@ -20,9 +20,8 @@ #include "box64context.h" #include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" #ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" +#include "../dynarec/arm64_lock_helper.h" #endif #include "modrm.h" @@ -31,9 +30,11 @@ int RunF0(x64emu_t *emu, rex_t rex) { uint8_t opcode; uint8_t nextop; - uint32_t tmp32u; + uint8_t tmp8u; + int32_t tmp32s; + uint32_t tmp32u, tmp32u2; int64_t tmp64s; - uint64_t tmp64u; + uint64_t tmp64u, tmp64u2; reg64_t *oped, *opgd; opcode = F8; @@ -46,42 +47,57 @@ int RunF0(x64emu_t *emu, rex_t rex) switch(opcode) { #ifdef DYNAREC - #define GO(B, OP) \ - case B+0: \ - nextop = F8; \ - GETEB(0); \ + #define GO(B, OP) \ + case B+0: \ + nextop = F8; \ + GETEB(0); \ GETGB; \ - do { \ - tmp8u = arm_lock_read_b(EB); \ - tmp8u = OP##8(emu, tmp8u, GB); \ - } while (arm_lock_write_b(EB, tmp8u)); \ - break; \ - case B+1: \ - nextop = F8; \ - GETED(0); \ + do { \ + tmp8u = arm64_lock_read_b(EB); \ + tmp8u = OP##8(emu, tmp8u, GB); \ + } while (arm64_lock_write_b(EB, tmp8u)); \ + break; \ + case B+1: \ + nextop = F8; \ + GETED(0); \ GETGD; \ - do { \ - tmp32u = arm_lock_read_d(ED); \ - tmp32u = OP##32(emu, tmp32u, GD.dword[0]); \ - } while (arm_lock_write_d(ED, tmp32u)); \ - break; \ - case B+2: \ - nextop = F8; \ - GETEB(0); \ + if(rex.w) { \ + do { \ + tmp64u = arm64_lock_read_dd(ED); \ + tmp64u = OP##64(emu, tmp64u, GD->q[0]); \ + } while (arm64_lock_write_dd(ED, tmp64u)); \ + } else { \ + do { \ + tmp32u = arm64_lock_read_d(ED); \ + tmp32u = OP##32(emu, tmp32u, GD->dword[0]); \ + } while (arm64_lock_write_d(ED, tmp32u)); \ + if(MODREG) \ + ED->dword[1] = 0; \ + } \ + break; \ + case B+2: \ + nextop = F8; \ + GETEB(0); \ GETGB; \ - GB = OP##8(emu, GB, EB->byte[0]); \ - break; \ - case B+3: \ - nextop = F8; \ - GETED(0); \ + GB = OP##8(emu, GB, EB->byte[0]); \ + break; \ + case B+3: \ + nextop = F8; \ + GETED(0); \ GETGD; \ - GD.dword[0] = OP##32(emu, GD.dword[0], ED->dword[0]); \ - break; \ - case B+4: \ - R_AL = OP##8(emu, R_AL, F8); \ - break; \ - case B+5: \ - R_EAX = OP##32(emu, R_EAX, F32); \ + if(rex.w) \ + GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]); \ + else \ + GD->q[0] = OP##32(emu, GD->dword[0], ED->dword[0]); \ + break; \ + case B+4: \ + R_AL = OP##8(emu, R_AL, F8); \ + break; \ + case B+5: \ + if(rex.w) \ + R_RAX = OP##64(emu, R_RAX, F32S64); \ + else \ + R_RAX = OP##32(emu, R_EAX, F32); \ break; #else #define GO(B, OP) \ @@ -157,32 +173,28 @@ int RunF0(x64emu_t *emu, rex_t rex) GETED(0); GETGD; #ifdef DYNAREC - if(((uintptr_t)ED)&3) { + if(rex.w) do { - tmp32u = ED->dword[0] & ~0xff; - tmp32u |= arm_lock_read_b(ED); - cmp32(emu, R_EAX, tmp32u); + tmp64u = arm64_lock_read_dd(ED); + cmp64(emu, R_RAX, tmp64u); if(ACCESS_FLAG(F_ZF)) { - tmp32s = arm_lock_write_b(ED, GD->dword[0] & 0xff); - if(!tmp32s) - ED->dword[0] = GD.dword[0]; + tmp32s = arm64_lock_write_dd(ED, GD->q[0]); } else { - R_EAX = tmp32u; + R_RAX = tmp64u; tmp32s = 0; } } while(tmp32s); - } else { + else do { - tmp32u = arm_lock_read_d(ED); + tmp32u = arm64_lock_read_d(ED); cmp32(emu, R_EAX, tmp32u); if(ACCESS_FLAG(F_ZF)) { - tmp32s = arm_lock_write_d(ED, GD.dword[0]); + tmp32s = arm64_lock_write_d(ED, GD->dword[0]); } else { - R_EAX = tmp32u; + R_RAX = tmp32u; tmp32s = 0; } } while(tmp32s); - } #else pthread_mutex_lock(&emu->context->mutex_lock); if(rex.w) { @@ -209,20 +221,21 @@ int RunF0(x64emu_t *emu, rex_t rex) GETED(0); GETGD; #ifdef DYNAREC - if(((uintptr_t)ED)&3) { + if(rex.w) { do { - tmp32u = ED->dword[0] & ~0xff; - tmp32u |= arm_lock_read_b(ED); - tmp32u2 = add32(emu, tmp32u, GD.dword[0]); - } while(arm_lock_write_b(ED, tmp32u2&0xff)); - ED->dword[0] = tmp32u2; + tmp64u = arm64_lock_read_dd(ED); + tmp64u2 = add64(emu, tmp64u, GD->dword[0]); + } while(arm64_lock_write_dd(ED, tmp64u2)); + GD->q[0] = tmp64u; } else { do { - tmp32u = arm_lock_read_d(ED); - tmp32u2 = add32(emu, tmp32u, GD.dword[0]); - } while(arm_lock_write_d(ED, tmp32u2)); + tmp32u = arm64_lock_read_d(ED); + tmp32u2 = add32(emu, tmp32u, GD->dword[0]); + } while(arm64_lock_write_d(ED, tmp32u2)); + GD->q[0] = tmp32u; + if(MODREG) + ED->dword[1] = 0; } - GD.dword[0] = tmp32u; #else pthread_mutex_lock(&emu->context->mutex_lock); if(rex.w) { @@ -232,7 +245,7 @@ int RunF0(x64emu_t *emu, rex_t rex) } else { tmp32u = add32(emu, ED->dword[0], GD->dword[0]); GD->q[0] = ED->dword[0]; - if((nextop&0xC0)==0xC0) + if(MODREG) ED->q[0] = tmp32u; else ED->dword[0] = tmp32u; @@ -256,15 +269,40 @@ int RunF0(x64emu_t *emu, rex_t rex) } else tmp64u = F32S64; #ifdef DYNAREC - switch((nextop>>3)&7) { - case 0: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, add32(emu, tmp32u2, tmp32u))); break; - case 1: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, or32(emu, tmp32u2, tmp32u))); break; - case 2: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, adc32(emu, tmp32u2, tmp32u))); break; - case 3: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sbb32(emu, tmp32u2, tmp32u))); break; - case 4: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, and32(emu, tmp32u2, tmp32u))); break; - case 5: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sub32(emu, tmp32u2, tmp32u))); break; - case 6: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, xor32(emu, tmp32u2, tmp32u))); break; - case 7: cmp32(emu, ED->dword[0], tmp32u); break; + if(rex.w) { + switch((nextop>>3)&7) { + case 0: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = add64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 1: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = or64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 2: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = adc64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 3: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = sbb64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 4: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = and64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 5: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = sub64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 6: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = xor64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break; + case 7: cmp64(emu, ED->q[0], tmp64u); break; + } + } else { + if(MODREG) + switch((nextop>>3)&7) { + case 0: ED->q[0] = add32(emu, ED->dword[0], tmp64u); break; + case 1: ED->q[0] = or32(emu, ED->dword[0], tmp64u); break; + case 2: ED->q[0] = adc32(emu, ED->dword[0], tmp64u); break; + case 3: ED->q[0] = sbb32(emu, ED->dword[0], tmp64u); break; + case 4: ED->q[0] = and32(emu, ED->dword[0], tmp64u); break; + case 5: ED->q[0] = sub32(emu, ED->dword[0], tmp64u); break; + case 6: ED->q[0] = xor32(emu, ED->dword[0], tmp64u); break; + case 7: cmp32(emu, ED->dword[0], tmp64u); break; + } + else + switch((nextop>>3)&7) { + case 0: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = add32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 1: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = or32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 2: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = adc32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 3: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sbb32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 4: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 5: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 6: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break; + case 7: cmp32(emu, ED->dword[0], tmp32u); break; + } } #else pthread_mutex_lock(&emu->context->mutex_lock); diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c index 7b8238a8..18ec9645 100644 --- a/src/emu/x64runf20f.c +++ b/src/emu/x64runf20f.c @@ -19,10 +19,6 @@ #include "x87emu_private.h" #include "box64context.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c index 9b25e73a..f18aa951 100644 --- a/src/emu/x64runf30f.c +++ b/src/emu/x64runf30f.c @@ -18,12 +18,7 @@ #include "x64trace.h" #include "x87emu_private.h" #include "box64context.h" -//#include "my_cpuid.h" #include "bridge.h" -//#include "signals.h" -#ifdef DYNAREC -#include "../dynarec/arm_lock_helper.h" -#endif #include "modrm.h" diff --git a/src/include/box64context.h b/src/include/box64context.h index 6e4d9052..c52ac53b 100755 --- a/src/include/box64context.h +++ b/src/include/box64context.h @@ -27,6 +27,8 @@ typedef struct dynablocklist_s dynablocklist_t; typedef struct mmaplist_s mmaplist_t; typedef struct kh_dynablocks_s kh_dynablocks_t; #endif +#define DYNAMAP_SHIFT 16 +#define JMPTABL_SHIFT 16 typedef void* (*procaddess_t)(const char* name); typedef void* (*vkprocaddess_t)(void* instance, const char* name); diff --git a/src/include/custommem.h b/src/include/custommem.h index a018fd37..d5e8a09a 100644 --- a/src/include/custommem.h +++ b/src/include/custommem.h @@ -27,10 +27,10 @@ void addDBFromAddressRange(uintptr_t addr, uintptr_t size); void cleanDBFromAddressRange(uintptr_t addr, uintptr_t size, int destroy); dynablocklist_t* getDB(uintptr_t idx); -void addJumpTableIfDefault(void* addr, void* jmp); -void setJumpTableDefault(void* addr); -uintptr_t getJumpTable(); -uintptr_t getJumpTableAddress(uintptr_t addr); +void addJumpTableIfDefault64(void* addr, void* jmp); +void setJumpTableDefault64(void* addr); +uintptr_t getJumpTable64(); +uintptr_t getJumpTableAddress64(uintptr_t addr); #endif #define PROT_DYNAREC 0x80 diff --git a/src/include/debug.h b/src/include/debug.h index 873db669..61071b48 100755 --- a/src/include/debug.h +++ b/src/include/debug.h @@ -7,6 +7,13 @@ extern int box64_log; // log level extern int box64_dynarec_log; extern int box64_dynarec; extern int box64_pagesize; +#ifdef DYNAREC +extern int box64_dynarec_dump; +extern int box64_dynarec_trace; +extern int box64_dynarec_forced; +extern int box64_dynarec_largest; +extern uintptr_t box64_nodynarec_start, box64_nodynarec_end; +#endif extern int dlsym_error; // log dlsym error extern int trace_xmm; // include XMM reg in trace? extern int trace_emm; // include EMM reg in trace? diff --git a/src/include/dynablock.h b/src/include/dynablock.h new file mode 100755 index 00000000..76f1e5e0 --- /dev/null +++ b/src/include/dynablock.h @@ -0,0 +1,31 @@ +#ifndef __DYNABLOCK_H_ +#define __DYNABLOCK_H_ + +typedef struct x64emu_s x64emu_t; +typedef struct dynablock_s dynablock_t; +typedef struct dynablocklist_s dynablocklist_t; +typedef struct kh_dynablocks_s kh_dynablocks_t; + +uint32_t X31_hash_code(void* addr, int len); +dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct); +void FreeDynablockList(dynablocklist_t** dynablocks); +void FreeDynablock(dynablock_t* db); +void MarkDynablock(dynablock_t* db); +void FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size); +void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size); + +dynablock_t* FindDynablockFromNativeAddress(void* addr); // defined in box64context.h +dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks); + +uintptr_t StartDynablockList(dynablocklist_t* db); +uintptr_t EndDynablockList(dynablocklist_t* db); +void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size); + +// Handling of Dynarec block (i.e. an exectable chunk of x64 translated code) +dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current); // return NULL if block is not found / cannot be created. Don't create if create==0 +dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr); + +// Create and Add an new dynablock in the list, handling direct/map +dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created); + +#endif //__DYNABLOCK_H_ \ No newline at end of file diff --git a/src/include/dynarec_arm64.h b/src/include/dynarec_arm64.h new file mode 100755 index 00000000..1225153d --- /dev/null +++ b/src/include/dynarec_arm64.h @@ -0,0 +1,9 @@ +#ifndef __DYNAREC_ARM_H_ +#define __DYNAREC_ARM_H_ + +typedef struct dynablock_s dynablock_t; +typedef struct x64emu_s x64emu_t; + +void* FillBlock64(dynablock_t* block, uintptr_t addr); + +#endif //__DYNAREC_ARM_H_ \ No newline at end of file diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 4ceac2d7..e3d3002a 100755 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -446,12 +446,12 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int // get that actual ESP first! x64emu_t *emu = thread_get_emu(); uintptr_t *frame = (uintptr_t*)R_RSP; -#if defined(DYNAREC) && defined(__arm__) +#if defined(DYNAREC) && defined(__aarch64__) ucontext_t *p = (ucontext_t *)ucntx; - void * pc = (void*)p->uc_mcontext.arm_pc; + void * pc = (void*)p->uc_mcontext.pc; dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc); if(db) { - frame = (uint32_t*)p->uc_mcontext.arm_r8; + frame = (uintptr_t*)p->uc_mcontext.regs[10+_SP]; } #endif // stack tracking @@ -495,17 +495,25 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int sigcontext->uc_mcontext.gregs[X64_EFL] = emu->eflags.x32; // get segments sigcontext->uc_mcontext.gregs[X64_CSGSFS] = ((uint64_t)(R_CS)) | (((uint64_t)(R_GS))<<16) | (((uint64_t)(R_FS))<<32); -#if defined(DYNAREC) && defined(__arm__) +#if defined(DYNAREC) && defined(__aarch64__) if(db) { - sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.arm_r4; - sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.arm_r5; - sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.arm_r6; - sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.arm_r7; - sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.arm_r8; - sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.arm_r9; - sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.arm_r10; - sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.arm_fp; - sigcontext->uc_mcontext.gregs[X64_RIP] = getX86Address(db, (uintptr_t)pc); + sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.regs[10]; + sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.regs[11]; + sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.regs[12]; + sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.regs[13]; + sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.regs[14]; + sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.regs[15]; + sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.regs[16]; + sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.regs[17]; + sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.regs[18]; + sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.regs[19]; + sigcontext->uc_mcontext.gregs[X64_R10] = p->uc_mcontext.regs[20]; + sigcontext->uc_mcontext.gregs[X64_R11] = p->uc_mcontext.regs[21]; + sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.regs[22]; + sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.regs[23]; + sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.regs[24]; + sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.regs[25]; + sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc); } #endif // get FloatPoint status @@ -637,7 +645,7 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int new_ss->ss_flags = 0; } -void my_box86signalhandler(int32_t sig, siginfo_t* info, void * ucntx) +void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) { // sig==SIGSEGV || sig==SIGBUS || sig==SIGILL here! int log_minimum = (my_context->is_sigaction[sig] && sig==SIGSEGV)?LOG_INFO:LOG_NONE; @@ -659,32 +667,47 @@ void my_box86signalhandler(int32_t sig, siginfo_t* info, void * ucntx) dynablock_t* db = NULL; int db_searched = 0; if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_DYNAREC)) { - if(box86_dynarec_smc) { - dynablock_t* db_pc = NULL; - db_pc = FindDynablockFromNativeAddress(pc); - if(db_pc) { - db = FindDynablockFromNativeAddress(addr); - db_searched = 1; - } - if(db_pc && db) { - if (db_pc == db) { - dynarec_log(LOG_NONE, "Warning: Access to protected %p from %p, inside same dynablock\n", addr, pc); - } - } - if(db && db->x86_addr>= addr && (db->x86_addr+db->x86_size)<addr) { - dynarec_log(LOG_INFO, "Warning, addr inside current dynablock!\n"); - } - } - dynarec_log(LOG_DEBUG, "Access to protected %p from %p, unprotecting memory (prot=%x)\n", addr, pc, prot); // access error, unprotect the block (and mark them dirty) if(prot&PROT_DYNAREC) // on heavy multi-thread program, the protection can already be gone... unprotectDB((uintptr_t)addr, 1); // unprotect 1 byte... But then, the whole page will be unprotected + // check if SMC inside block + if(!db_searched) { + db = FindDynablockFromNativeAddress(pc); + db_searched = 1; + } + if(db && (addr>=db->x64_addr && addr<(db->x64_addr+db->x64_size))) { + // dynablock got auto-dirty! need to get out of it!!! + emu_jmpbuf_t* ejb = GetJmpBuf(); + if(ejb->jmpbuf_ok) { + ejb->emu->regs[_AX].q[0] = p->uc_mcontext.regs[10]; + ejb->emu->regs[_CX].q[0] = p->uc_mcontext.regs[11]; + ejb->emu->regs[_DX].q[0] = p->uc_mcontext.regs[12]; + ejb->emu->regs[_BX].q[0] = p->uc_mcontext.regs[13]; + ejb->emu->regs[_SP].q[0] = p->uc_mcontext.regs[14]; + ejb->emu->regs[_BP].q[0] = p->uc_mcontext.regs[15]; + ejb->emu->regs[_SI].q[0] = p->uc_mcontext.regs[16]; + ejb->emu->regs[_DI].q[0] = p->uc_mcontext.regs[17]; + ejb->emu->regs[_R8].q[0] = p->uc_mcontext.regs[18]; + ejb->emu->regs[_R9].q[0] = p->uc_mcontext.regs[19]; + ejb->emu->regs[_R10].q[0] = p->uc_mcontext.regs[20]; + ejb->emu->regs[_R11].q[0] = p->uc_mcontext.regs[21]; + ejb->emu->regs[_R12].q[0] = p->uc_mcontext.regs[22]; + ejb->emu->regs[_R13].q[0] = p->uc_mcontext.regs[23]; + ejb->emu->regs[_R14].q[0] = p->uc_mcontext.regs[24]; + ejb->emu->regs[_R15].q[0] = p->uc_mcontext.regs[25]; + ejb->emu->ip.q[0] = getX64Address(db, (uintptr_t)pc); + ejb->emu->eflags.x64 = p->uc_mcontext.regs[26]; + dynarec_log(LOG_DEBUG, "Auto-SMC detected, getting out of current Dynablock!\n"); + longjmp(ejb->jmpbuf, 2); + } + dynarec_log(LOG_INFO, "Warning, Auto-SMC (%p for db %p/%p) detected, but jmpbuffer not ready!\n", (void*)addr, db, (void*)db->x64_addr); + } // done if(prot&PROT_WRITE) return; // if there is no write permission, don't return and continue to program signal handling } else if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&(PROT_READ|PROT_WRITE))) { db = FindDynablockFromNativeAddress(pc); db_searched = 1; - if(db && db->x86_addr>= addr && (db->x86_addr+db->x86_size)<addr) { + if(db && db->x64_addr>= addr && (db->x64_addr+db->x64_size)<addr) { dynarec_log(LOG_INFO, "Warning, addr inside current dynablock!\n"); } if(addr && pc && db) { @@ -718,13 +741,13 @@ exit(-1); x64emu_t* emu = thread_get_emu(); x64pc = R_RIP; rsp = (void*)R_RSP; -#if defined(__arm__) && defined(DYNAREC) - if(db && p->uc_mcontext.arm_r0>0x10000) { - emu = (x64emu_t*)p->uc_mcontext.arm_r0; +#if defined(__aarch64__) && defined(DYNAREC) + if(db && p->uc_mcontext.regs[0]>0x10000) { + emu = (x64emu_t*)p->uc_mcontext.regs[0]; } if(db) { x64pc = getX64Address(db, (uintptr_t)pc); - rsp = (void*)p->uc_mcontext.arm_r8; + rsp = (void*)p->uc_mcontext.regs[10+_SP]; } #endif x64name = getAddrFunctionName(x64pc); @@ -755,12 +778,12 @@ exit(-1); #ifdef DYNAREC uint32_t hash = 0; if(db) - hash = X31_hash_code(db->x86_addr, db->x86_size); + hash = X31_hash_code(db->x64_addr, db->x64_size); printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p), for accessing %p (code=%d/prot=%x), db=%p(%p:%p/%p:%p/%s:%s, hash:%x/%x)", GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, addr, info->si_code, prot, db, db?db->block:0, db?(db->block+db->size):0, - db?db->x86_addr:0, db?(db->x86_addr+db->x86_size):0, - getAddrFunctionName((uintptr_t)(db?db->x86_addr:0)), (db?db->need_test:0)?"need_stest":"clean", db?db->hash:0, hash); + db?db->x64_addr:0, db?(db->x64_addr+db->x64_size):0, + getAddrFunctionName((uintptr_t)(db?db->x64_addr:0)), (db?db->need_test:0)?"need_stest":"clean", db?db->hash:0, hash); #else printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p), for accessing %p (code=%d)", GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, addr, info->si_code); #endif @@ -788,7 +811,7 @@ void my_sigactionhandler(int32_t sig, siginfo_t* info, void * ucntx) { #ifdef DYNAREC ucontext_t *p = (ucontext_t *)ucntx; - void * pc = (void*)p->uc_mcontext.arm_pc; + void * pc = (void*)p->uc_mcontext.pc; dynablock_t* db = FindDynablockFromNativeAddress(pc); #else void* db = NULL; @@ -1135,13 +1158,13 @@ void init_signal_helper(box64context_t* context) } struct sigaction action = {0}; action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; - action.sa_sigaction = my_box86signalhandler; + action.sa_sigaction = my_box64signalhandler; sigaction(SIGSEGV, &action, NULL); action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; - action.sa_sigaction = my_box86signalhandler; + action.sa_sigaction = my_box64signalhandler; sigaction(SIGBUS, &action, NULL); action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER; - action.sa_sigaction = my_box86signalhandler; + action.sa_sigaction = my_box64signalhandler; sigaction(SIGILL, &action, NULL); pthread_once(&sigstack_key_once, sigstack_key_alloc); diff --git a/src/libtools/threads.c b/src/libtools/threads.c index 3e1a8eaa..386960db 100755 --- a/src/libtools/threads.c +++ b/src/libtools/threads.c @@ -299,7 +299,7 @@ EXPORT int my_pthread_create(x64emu_t *emu, void* t, void* attr, void* start_rou et->fnc = (uintptr_t)start_routine; et->arg = arg; #ifdef DYNAREC - if(box86_dynarec) { + if(box64_dynarec) { // pre-creation of the JIT code for the entry point of the thread dynablock_t *current = NULL; DBGetBlock(emu, (uintptr_t)start_routine, 1, ¤t); diff --git a/src/main.c b/src/main.c index 095b61ad..0811136c 100755 --- a/src/main.c +++ b/src/main.c @@ -29,7 +29,16 @@ int box64_log = LOG_INFO; //LOG_NONE; int box64_nobanner = 0; int box64_dynarec_log = LOG_NONE; int box64_pagesize; +#ifdef DYNAREC +int box64_dynarec = 1; +int box64_dynarec_dump = 0; +int box64_dynarec_forced = 0; +int box64_dynarec_largest = 0; +uintptr_t box64_nodynarec_start = 0; +uintptr_t box64_nodynarec_end = 0; +#else //DYNAREC int box64_dynarec = 0; +#endif int dlsym_error = 0; int trace_xmm = 0; int trace_emm = 0; @@ -128,6 +137,61 @@ void LoadLogEnv() if(!box64_nobanner) printf_log(LOG_INFO, "Debug level is %d\n", box64_log); } +#ifdef DYNAREC + p = getenv("BOX64_DYNAREC_DUMP"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[1]<='1') + box64_dynarec_dump = p[0]-'0'; + } + if (box64_dynarec_dump) printf_log(LOG_INFO, "Dynarec blocks are dumped%s\n", (box64_dynarec_dump>1)?" in color":""); + } + p = getenv("BOX64_DYNAREC_LOG"); + if(p) { + if(strlen(p)==1) { + if((p[0]>='0'+LOG_NONE) && (p[0]<='0'+LOG_DUMP)) + box64_dynarec_log = p[0]-'0'; + } else { + if(!strcasecmp(p, "NONE")) + box64_dynarec_log = LOG_NONE; + else if(!strcasecmp(p, "INFO")) + box64_dynarec_log = LOG_INFO; + else if(!strcasecmp(p, "DEBUG")) + box64_dynarec_log = LOG_DEBUG; + else if(!strcasecmp(p, "VERBOSE")) + box64_dynarec_log = LOG_DUMP; + } + printf_log(LOG_INFO, "Dynarec log level is %d\n", box64_dynarec_log); + } + p = getenv("BOX64_DYNAREC"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[1]<='1') + box64_dynarec = p[0]-'0'; + } + printf_log(LOG_INFO, "Dynarec is %s\n", box64_dynarec?"On":"Off"); + } + p = getenv("BOX64_DYNAREC_FORCED"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[1]<='1') + box64_dynarec_forced = p[0]-'0'; + } + if(box64_dynarec_forced) + printf_log(LOG_INFO, "Dynarec is Forced on all addresses\n"); + } + p = getenv("BOX64_NODYNAREC"); + if(p) { + if (strchr(p,'-')) { + if(sscanf(p, "%ld-%ld", &box64_nodynarec_start, &box64_nodynarec_end)!=2) { + if(sscanf(p, "0x%lX-0x%lX", &box64_nodynarec_start, &box64_nodynarec_end)!=2) + sscanf(p, "%lx-%lx", &box64_nodynarec_start, &box64_nodynarec_end); + } + printf_log(LOG_INFO, "No Dynablock creation that start in %p - %p range\n", (void*)box64_nodynarec_start, (void*)box64_nodynarec_end); + } + } + +#endif #ifdef HAVE_TRACE p = getenv("BOX64_TRACE_XMM"); if(p) { @@ -149,6 +213,17 @@ void LoadLogEnv() start_cnt = strtoll(p, &p2, 10); printf_log(LOG_INFO, "Will start trace only after %lu instructions\n", start_cnt); } +#ifdef DYNAREC + p = getenv("BOX64_DYNAREC_TRACE"); + if(p) { + if(strlen(p)==1) { + if(p[0]>='0' && p[1]<='0'+1) + box64_dynarec_trace = p[0]-'0'; + if(box64_dynarec_trace) + printf_log(LOG_INFO, "Dynarec generated code will also print a trace\n"); + } + } +#endif #endif // grab BOX64_TRACE_FILE envvar, and change %pid to actual pid is present in the name openFTrace(); @@ -345,6 +420,11 @@ void PrintHelp() { printf(" BOX64_LD_LIBRARY_PATH is the box64 version LD_LIBRARY_PATH (default is '.:lib')\n"); printf(" BOX64_LOG with 0/1/2/3 or NONE/INFO/DEBUG/DUMP to set the printed debug info\n"); printf(" BOX64_NOBANNER with 0/1 to enable/disable the printing of box64 version and build at start\n"); +#ifdef DYNAREC + printf(" BOX64_DYNAREC_LOG with 0/1/2/3 or NONE/INFO/DEBUG/DUMP to set the printed dynarec info\n"); + printf(" BOX64_DYNAREC with 0/1 to disable or enable Dynarec (On by default)\n"); + printf(" BOX64_NODYNAREC with address interval (0x1234-0x4567) to forbid dynablock creation in the interval specified\n"); +#endif #ifdef HAVE_TRACE printf(" BOX64_TRACE with 1 to enable x86_64 execution trace\n"); printf(" or with XXXXXX-YYYYYY to enable x86_64 execution trace only between address\n"); @@ -353,6 +433,9 @@ void PrintHelp() { printf(" BOX64_TRACE_EMM with 1 to enable dump of MMX registers along with regular registers\n"); printf(" BOX64_TRACE_XMM with 1 to enable dump of SSE registers along with regular registers\n"); printf(" BOX64_TRACE_START with N to enable trace after N instructions\n"); +#ifdef DYNAREC + printf(" BOX64_DYNAREC_TRACE with 0/1 to disable or enable Trace on generated code too\n"); +#endif #endif printf(" BOX64_TRACE_FILE with FileName to redirect logs in a file"); printf(" BOX64_DLSYM_ERROR with 1 to log dlsym errors\n"); @@ -765,7 +848,7 @@ int main(int argc, const char **argv, const char **env) { if(ElfCheckIfUseTCMallocMinimal(elf_header)) { if(!box64_tcmalloc_minimal) { // need to reload with tcmalloc_minimal as a LD_PRELOAD! - printf_log(LOG_INFO, "BOX86: tcmalloc_minimal.so.4 used, reloading box64 with the lib preladed\n"); + printf_log(LOG_INFO, "BOX64: tcmalloc_minimal.so.4 used, reloading box64 with the lib preladed\n"); // need to get a new envv variable. so first count it and check if LD_PRELOAD is there int preload=(getenv("LD_PRELOAD"))?1:0; int nenv = 0; @@ -805,7 +888,7 @@ int main(int argc, const char **argv, const char **env) { if(execve(newargv[0], newargv, newenv)<0) printf_log(LOG_NONE, "Failed to relaunch, error is %d/%s\n", errno, strerror(errno)); } else { - printf_log(LOG_INFO, "BOX86: Using tcmalloc_minimal.so.4, and it's in the LD_PRELOAD command\n"); + printf_log(LOG_INFO, "BOX64: Using tcmalloc_minimal.so.4, and it's in the LD_PRELOAD command\n"); } } // get and alloc stack size and align diff --git a/src/tools/bridge.c b/src/tools/bridge.c index 3724e0b3..a89f32ca 100755 --- a/src/tools/bridge.c +++ b/src/tools/bridge.c @@ -5,7 +5,7 @@ #include <dlfcn.h> #include <pthread.h> -//#include "custommem.h" +#include "custommem.h" #include "bridge.h" #include "bridge_private.h" #include "khash.h" @@ -146,7 +146,8 @@ uintptr_t AddAutomaticBridge(x64emu_t* emu, bridge_t* bridge, wrapper_t w, void* addAlternate(fnc, (void*)ret); #ifdef DYNAREC // now, check if dynablock at native address exist - DBAlternateBlock(emu, (uintptr_t)fnc, ret); + if(box64_dynarec) + DBAlternateBlock(emu, (uintptr_t)fnc, ret); #endif } return ret; |