diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2021-03-14 17:58:04 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2021-03-14 17:58:04 +0100 |
| commit | 4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6 (patch) | |
| tree | e89e9892fa166aa348b8c9f902de7428e875c7bc /src/dynarec | |
| parent | 3dda84e58b148f92b2bb4d94caacc84011fa3919 (diff) | |
| download | box64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.tar.gz box64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.zip | |
[DYNAREC] Added Basic blocks for dynarec
Diffstat (limited to 'src/dynarec')
| -rwxr-xr-x | src/dynarec/arm64_epilog.S | 81 | ||||
| -rwxr-xr-x | src/dynarec/arm64_lock_helper.S | 87 | ||||
| -rwxr-xr-x | src/dynarec/arm64_lock_helper.h | 31 | ||||
| -rwxr-xr-x | src/dynarec/arm64_next.S | 47 | ||||
| -rwxr-xr-x | src/dynarec/arm64_printer.c | 14 | ||||
| -rw-r--r-- | src/dynarec/arm64_printer.h | 6 | ||||
| -rwxr-xr-x | src/dynarec/arm64_prolog.S | 53 | ||||
| -rwxr-xr-x | src/dynarec/dynablock.c | 422 | ||||
| -rwxr-xr-x | src/dynarec/dynablock_private.h | 36 | ||||
| -rwxr-xr-x | src/dynarec/dynarec.c | 38 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64.c | 449 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_functions.c | 354 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_functions.h | 61 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_arm64_private.h | 56 | ||||
| -rwxr-xr-x | src/dynarec/dynarec_private.h | 35 |
15 files changed, 1751 insertions, 19 deletions
diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S new file mode 100755 index 00000000..af39c1ba --- /dev/null +++ b/src/dynarec/arm64_epilog.S @@ -0,0 +1,81 @@ +//arm epilog for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global arm64_epilog +arm64_epilog: + //update register -> emu + str x10, [x0, (8 * 0)] + str x11, [x0, (8 * 1)] + str x12, [x0, (8 * 2)] + str x13, [x0, (8 * 3)] + str x14, [x0, (8 * 4)] + str x15, [x0, (8 * 5)] + str x16, [x0, (8 * 6)] + str x17, [x0, (8 * 7)] + str x18, [x0, (8 * 8)] + str x19, [x0, (8 * 9)] + str x20, [x0, (8 * 10)] + str x21, [x0, (8 * 11)] + str x22, [x0, (8 * 12)] + str x23, [x0, (8 * 13)] + str x24, [x0, (8 * 14)] + str x25, [x0, (8 * 15)] + str x26, [x0, (8 * 16)] + str x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x25 must be EIP now) + //restore all used register + //vpop {d8-d15} + ldr x10, [sp, (8 * 0)] + ldr x11, [sp, (8 * 1)] + ldr x12, [sp, (8 * 2)] + ldr x13, [sp, (8 * 3)] + ldr x14, [sp, (8 * 4)] + ldr x15, [sp, (8 * 5)] + ldr x16, [sp, (8 * 6)] + ldr x17, [sp, (8 * 7)] + ldr x18, [sp, (8 * 8)] + ldr x19, [sp, (8 * 9)] + ldr x20, [sp, (8 * 10)] + ldr x21, [sp, (8 * 11)] + ldr x22, [sp, (8 * 12)] + ldr x23, [sp, (8 * 13)] + ldr x24, [sp, (8 * 14)] + ldr x25, [sp, (8 * 15)] + ldr x26, [sp, (8 * 16)] + ldr x27, [sp, (8 * 17)] + add sp, sp, (8 * 18) + ldp lr, fp, [sp, 16]! // saved lr + //end, return... + ret + + +.global arm64_epilog_fast +arm64_epilog_fast: + //restore all used register + //vpop {d8-d15} + ldr x8, [sp, (8 * 0)] + ldr x9, [sp, (8 * 1)] + ldr x10, [sp, (8 * 2)] + ldr x11, [sp, (8 * 3)] + ldr x12, [sp, (8 * 4)] + ldr x13, [sp, (8 * 5)] + ldr x14, [sp, (8 * 6)] + ldr x15, [sp, (8 * 7)] + ldr x16, [sp, (8 * 8)] + ldr x17, [sp, (8 * 9)] + ldr x18, [sp, (8 * 10)] + ldr x19, [sp, (8 * 11)] + ldr x20, [sp, (8 * 12)] + ldr x21, [sp, (8 * 13)] + ldr x22, [sp, (8 * 14)] + ldr x23, [sp, (8 * 15)] + ldr x24, [sp, (8 * 16)] + ldr x25, [sp, (8 * 17)] + add sp, sp, (8 * 18) + ldp lr, fp, [sp, 16]! // saved lr + //end, return... + ret diff --git a/src/dynarec/arm64_lock_helper.S b/src/dynarec/arm64_lock_helper.S new file mode 100755 index 00000000..51b43316 --- /dev/null +++ b/src/dynarec/arm64_lock_helper.S @@ -0,0 +1,87 @@ +//arm lock helper +//there is 2 part: read and write +// write return 0 on success, 1 on fail (value has been changed) + +.text +.align 4 + +.global arm64_lock_read_b +.global arm64_lock_write_b +.global arm64_lock_read_h +.global arm64_lock_write_h +.global arm64_lock_read_d +.global arm64_lock_write_d +.global arm64_lock_read_dd +.global arm64_lock_write_dd +.global arm64_lock_xchg +.global arm64_lock_storeifnull + + +arm64_lock_read_b: + // address is x0, return is x0 + ldaxrb w0, [x0] + ret + +arm64_lock_write_b: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxrb w0, w1, [x2] + ret + +arm64_lock_read_h: + // address is x0, return is x0 + ldaxrh w0, [x0] + ret + +arm64_lock_write_h: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxrh w0, w1, [x2] + ret + +arm64_lock_read_d: + // address is x0, return is x0 + #ldaxr w0, [x0] + ldr w0,[x0] + ret + +arm64_lock_write_d: + // address is x0, value is w1, return is x0 + mov x2, x0 + #stlxr w0, w1, [x2] + str w1, [x2] + mov w0, 0 + ret + +arm64_lock_read_dd: + // address is x0, return is x0 + ldaxr x0, [x0] + ret + +arm64_lock_write_dd: + // address is x0, value is x1, return is x0 + mov x2, x0 + stlxr w0, x1, [x2] + ret + +arm64_lock_xchg: + // address is x0, value is x1, return old value in x0 + ldaxr w2, [x0] + stlxr w3, w1, [x0] + cmp w3, #1 + beq arm64_lock_xchg + mov w0, w2 + ret + +arm64_lock_storeifnull: + // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value) + ldaxr x2, [x0] + cmp x2, #0 + bne arm64_lock_storeifnull_exit + mov x2, x1 + stlxr w3, x2, [x0] + cmp w3, #1 + beq arm64_lock_storeifnull +arm64_lock_storeifnull_exit: + mov x0, x2 + ret diff --git a/src/dynarec/arm64_lock_helper.h b/src/dynarec/arm64_lock_helper.h new file mode 100755 index 00000000..a6879bea --- /dev/null +++ b/src/dynarec/arm64_lock_helper.h @@ -0,0 +1,31 @@ +#ifndef __ARM64_LOCK_HELPER__H__ +#define __ARM64_LOCK_HELPER__H__ +#include <stdint.h> + +// LDAXRB of ADDR +extern uint8_t arm64_lock_read_b(void* addr); +// STLXRB of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_b(void* addr, uint8_t val); + +// LDAXRH of ADDR +extern uint16_t arm64_lock_read_h(void* addr); +// STLXRH of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_h(void* addr, uint16_t val); + +// LDAXR of ADDR +extern uint32_t arm64_lock_read_d(void* addr); +// STLXR of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_d(void* addr, uint32_t val); + +// LDAXRD of ADDR +extern uint64_t arm64_lock_read_dd(void* addr); +// STLXR of ADDR, return 0 if ok, 1 if not +extern int arm64_lock_write_dd(void* addr, uint64_t val); + +// Atomicaly exchange value at [p] with val, return old p +extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val); + +// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old) +extern void* arm64_lock_storeifnull(void*p, void* val); + +#endif //__ARM64_LOCK_HELPER__H__ \ No newline at end of file diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S new file mode 100755 index 00000000..2410750c --- /dev/null +++ b/src/dynarec/arm64_next.S @@ -0,0 +1,47 @@ +//arm update linker table for dynarec +//called with pointer to emu as 1st parameter +//and address of table to as 2nd parameter +//ip is at r12 + +.text +.align 4 + +.extern LinkNext + +.global arm64_next +arm64_next: + // emu is r0 + // don't put put back reg value in emu, faster but more tricky to debug + // IP address is r1 + sub sp, sp, (8 * 11) + str x0, [sp, (8 * 0)] + str x1, [sp, (8 * 1)] + str x10, [sp, (8 * 2)] + str x11, [sp, (8 * 3)] + str x12, [sp, (8 * 4)] + str x13, [sp, (8 * 5)] + str x14, [sp, (8 * 6)] + str x15, [sp, (8 * 7)] + str x16, [sp, (8 * 8)] + str x17, [sp, (8 * 9)] + str x18, [sp, (8 * 10)] + // call the function + bl LinkNext + // preserve return value + mov x3, x0 + // pop regs + ldr x0, [sp, (8 * 0)] + ldr x1, [sp, (8 * 1)] + ldr x10, [sp, (8 * 2)] + ldr x11, [sp, (8 * 3)] + ldr x12, [sp, (8 * 4)] + ldr x13, [sp, (8 * 5)] + ldr x14, [sp, (8 * 6)] + ldr x15, [sp, (8 * 7)] + ldr x16, [sp, (8 * 8)] + ldr x17, [sp, (8 * 9)] + ldr x18, [sp, (8 * 10)] + add sp, sp, (8 * 11) + // return offset is jump address + br x3 + diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c new file mode 100755 index 00000000..49539a1a --- /dev/null +++ b/src/dynarec/arm64_printer.c @@ -0,0 +1,14 @@ +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdio.h> + +#include "arm64_printer.h" + +const char* arm64_print(uint32_t opcode) +{ + static char buff[200]; + + snprintf(buff, sizeof(buff), "0x%8X ???", opcode); + return buff; +} \ No newline at end of file diff --git a/src/dynarec/arm64_printer.h b/src/dynarec/arm64_printer.h new file mode 100644 index 00000000..6fe21c33 --- /dev/null +++ b/src/dynarec/arm64_printer.h @@ -0,0 +1,6 @@ +#ifndef _ARM_PRINTER_H_ +#define _ARM_PRINTER_H_ + +const char* arm64_print(uint32_t opcode); + +#endif //_ARM_PRINTER_H_ diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S new file mode 100755 index 00000000..f480f2ea --- /dev/null +++ b/src/dynarec/arm64_prolog.S @@ -0,0 +1,53 @@ +//arm prologue for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global arm64_prolog +arm64_prolog: + //save all 18 used register + stp lr, fp, [sp, 16]! // save lr + sub sp, sp, (8 * 18) + str x10, [sp, (8 * 0)] + str x11, [sp, (8 * 1)] + str x12, [sp, (8 * 2)] + str x13, [sp, (8 * 3)] + str x14, [sp, (8 * 4)] + str x15, [sp, (8 * 5)] + str x16, [sp, (8 * 6)] + str x17, [sp, (8 * 7)] + str x18, [sp, (8 * 8)] + str x19, [sp, (8 * 9)] + str x20, [sp, (8 * 10)] + str x21, [sp, (8 * 11)] + str x22, [sp, (8 * 12)] + str x23, [sp, (8 * 13)] + str x24, [sp, (8 * 14)] + str x25, [sp, (8 * 15)] + str x26, [sp, (8 * 16)] + str x27, [sp, (8 * 17)] + //vpush {d8-d15} // save NEON regs? + //setup emu -> register + ldr x10, [x0, (8 * 0)] + ldr x11, [x0, (8 * 1)] + ldr x12, [x0, (8 * 2)] + ldr x13, [x0, (8 * 3)] + ldr x14, [x0, (8 * 4)] + ldr x15, [x0, (8 * 5)] + ldr x16, [x0, (8 * 6)] + ldr x17, [x0, (8 * 7)] + ldr x18, [x0, (8 * 8)] + ldr x19, [x0, (8 * 9)] + ldr x20, [x0, (8 * 10)] + ldr x21, [x0, (8 * 11)] + ldr x22, [x0, (8 * 12)] + ldr x23, [x0, (8 * 13)] + ldr x24, [x0, (8 * 14)] + ldr x25, [x0, (8 * 15)] + ldr x26, [x0, (8 * 16)] + ldr x27, [x0, (8 * 17)] + //jump to function + br x1 diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c new file mode 100755 index 00000000..2ab39d09 --- /dev/null +++ b/src/dynarec/dynablock.c @@ -0,0 +1,422 @@ +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynablock_private.h" +#include "dynarec_private.h" +#include "elfloader.h" +#ifdef ARM64 +#include "dynarec_arm64.h" +#include "arm64_lock_helper.h" +#else +#error Unsupported architecture! +#endif +#include "custommem.h" +#include "khash.h" + +KHASH_MAP_INIT_INT(dynablocks, dynablock_t*) + +uint32_t X31_hash_code(void* addr, int len) +{ + if(!len) return 0; + uint8_t* p = (uint8_t*)addr; + int32_t h = *p; + for (--len, ++p; len; --len, ++p) h = (h << 5) - h + (int32_t)*p; + return (uint32_t)h; +} + +dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct) +{ + if(!textsz) { + printf_log(LOG_NONE, "Error, creating a NULL sized Dynablock\n"); + return NULL; + } + dynablocklist_t* ret = (dynablocklist_t*)calloc(1, sizeof(dynablocklist_t)); + ret->text = text; + ret->textsz = textsz; + if(direct && textsz) { + ret->direct = (dynablock_t**)calloc(textsz, sizeof(dynablock_t*)); + if(!ret->direct) {printf_log(LOG_NONE, "Warning, fail to create direct block for dynablock @%p\n", (void*)text);} + } + dynarec_log(LOG_DEBUG, "New Dynablocklist %p, from %p->%p\n", ret, (void*)text, (void*)(text+textsz)); + return ret; +} + +void FreeDynablock(dynablock_t* db) +{ + if(db) { + dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size, db->father, db->sons_size, db->gone); + if(db->gone) + return; // already in the process of deletion! + db->done = 0; + db->gone = 1; + // remove from direct if there + uintptr_t startdb = db->parent->text; + uintptr_t enddb = db->parent->text + db->parent->textsz; + if(db->parent->direct) { + uintptr_t addr = (uintptr_t)db->x64_addr; + if(addr>=startdb && addr<enddb) + db->parent->direct[addr-startdb] = NULL; + } + // remove jumptable + setJumpTableDefault64(db->x64_addr); + // remove and free the sons + for (int i=0; i<db->sons_size; ++i) { + dynablock_t *son = (dynablock_t*)arm64_lock_xchg(&db->sons[i], 0); + FreeDynablock(son); + } + // only the father free the DynarecMap + if(!db->father) { + dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->block, db->size); + FreeDynarecMap(db, (uintptr_t)db->block, db->size); + } + free(db->sons); + free(db->instsize); + free(db); + } +} + +void FreeDynablockList(dynablocklist_t** dynablocks) +{ + if(!dynablocks) + return; + if(!*dynablocks) + return; + dynarec_log(LOG_DEBUG, "Free Dynablocklist %p, with Direct Blocks %p\n", *dynablocks, (*dynablocks)->direct); + if((*dynablocks)->direct) { + for (int i=0; i<(*dynablocks)->textsz; ++i) { + if((*dynablocks)->direct[i] && !(*dynablocks)->direct[i]->father) + FreeDynablock((*dynablocks)->direct[i]); + } + free((*dynablocks)->direct); + } + (*dynablocks)->direct = NULL; + + free(*dynablocks); + *dynablocks = NULL; +} + +void MarkDynablock(dynablock_t* db) +{ + if(db) { + if(db->father) + db = db->father; // mark only father + if(db->need_test) + return; // already done + db->need_test = 1; // test only blocks that can be marked (and so deleted) + setJumpTableDefault64(db->x64_addr); + for(int i=0; i<db->sons_size; ++i) + setJumpTableDefault64(db->sons[i]->x64_addr); + } +} + +uintptr_t StartDynablockList(dynablocklist_t* db) +{ + if(db) + return db->text; + return 0; +} +uintptr_t EndDynablockList(dynablocklist_t* db) +{ + if(db) + return db->text+db->textsz-1; + return 0; +} + +int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2) +{ + if(start1 > end2 || start2 > end1) + return 0; + return 1; +} + +void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +{ + if(!dynablocks) + return; + if(!dynablocks->direct) + return; + uintptr_t startdb = dynablocks->text; + uintptr_t enddb = startdb + dynablocks->textsz -1; + uintptr_t start = addr; + uintptr_t end = addr+size-1; + if(start<startdb) + start = startdb; + if(end>enddb) + end = enddb; + dynablock_t *db; + if(end>startdb && start<enddb) + for(uintptr_t i = start; i<end; ++i) + if((db=dynablocks->direct[i-startdb])) + if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1)) + MarkDynablock(db); +} + +void FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +{ + if(!dynablocks) + return; + + if(dynablocks->direct) { + dynablock_t* db; + int ret; + khint_t k; + kh_dynablocks_t *blocks = kh_init(dynablocks); + // copy in a temporary list + if(dynablocks->direct) { + uintptr_t startdb = dynablocks->text; + uintptr_t enddb = startdb + dynablocks->textsz; + uintptr_t start = addr; + uintptr_t end = addr+size; + if(start<startdb) + start = startdb; + if(end>enddb) + end = enddb; + if(end>startdb && start<enddb) + for(uintptr_t i = start; i<end; ++i) { + db = (dynablock_t*)arm64_lock_xchg(&dynablocks->direct[i-startdb], 0); + if(db) { + if(db->father) + db = db->father; + if(db->parent==dynablocks) { + k = kh_put(dynablocks, blocks, (uintptr_t)db, &ret); + kh_value(blocks, k) = db; + } + } + } + } + // purge the list + kh_foreach_value(blocks, db, + FreeDynablock(db); + ); + kh_destroy(dynablocks, blocks); + } +} +void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size) +{ + if(!dynablocks) + return; + if(dynablocks->direct) { + uintptr_t new_addr = addr - dynablocks->maxsz; + uintptr_t new_size = size + dynablocks->maxsz; + MarkDirectDynablock(dynablocks, new_addr, new_size); + // the blocks check before + for(int idx=(new_addr)>>DYNAMAP_SHIFT; idx<(addr>>DYNAMAP_SHIFT); ++idx) + MarkDirectDynablock(getDB(idx), new_addr, new_size); + } +} + +dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks) +{ + if(!dynablocks) + return NULL; + dynablock_t* db; + kh_foreach_value(dynablocks, db, + const uintptr_t s = (uintptr_t)db->block; + const uintptr_t e = (uintptr_t)db->block+db->size; + if((uintptr_t)addr>=s && (uintptr_t)addr<e) + return db->father?db->father:db; + ) + return NULL; +} + +static dynablocklist_t* getDBFromAddress(uintptr_t addr) +{ + const uintptr_t idx = (addr>>DYNAMAP_SHIFT); + return getDB(idx); +} + +dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created) +{ + if(!dynablocks) { + dynarec_log(LOG_INFO, "Warning: Ask to create a dynablock with a NULL dynablocklist (addr=%p)\n", (void*)addr); + *created = 0; + return NULL; + } + if((addr<dynablocks->text) || (addr>=(dynablocks->text+dynablocks->textsz))) { + // this should be useless + //dynarec_log(LOG_INFO, "Warning: Refused to create a Direct Block that is out-of-bound: dynablocks=%p (%p:%p), addr=%p\n", dynablocks, (void*)(dynablocks->text), (void*)(dynablocks->text+dynablocks->textsz), (void*)addr); + //*created = 0; + //return NULL; + return AddNewDynablock(getDBFromAddress(addr), addr, created); + } + dynablock_t* block = NULL; + // first, check if it exist in direct access mode + if(dynablocks->direct) { + block = dynablocks->direct[addr-dynablocks->text]; + if(block) { + dynarec_log(LOG_DUMP, "Block already exist in Direct Map\n"); + *created = 0; + return block; + } + } + + if (!*created) + return block; + + if(!dynablocks->direct) { + dynablock_t** p = (dynablock_t**)calloc(dynablocks->textsz, sizeof(dynablock_t*)); + if(arm64_lock_storeifnull(&dynablocks->direct, p)!=p) + free(p); // someone already create the direct array, too late... + } + + // create and add new block + dynarec_log(LOG_DUMP, "Ask for DynaRec Block creation @%p\n", (void*)addr); + + block = (dynablock_t*)calloc(1, sizeof(dynablock_t)); + block->parent = dynablocks; + dynablock_t* tmp = (dynablock_t*)arm64_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block); + if(tmp != block) { + // a block appeard! + free(block); + *created = 0; + return tmp; + } + + *created = 1; + return block; +} + +/* + return NULL if block is not found / cannot be created. + Don't create if create==0 +*/ +static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, dynablock_t* current) +{ + // try the quickest way first: get parent of current and check if ok! + dynablocklist_t *dynablocks = NULL; + dynablock_t* block = NULL; + if(current) { + dynablocks = current->parent; + if(dynablocks && !(addr>=dynablocks->text && addr<(dynablocks->text+dynablocks->textsz))) + dynablocks = NULL; + } + // nope, lets do the long way + if(!dynablocks) { + dynablocks = getDBFromAddress(addr); + if(!dynablocks) { + dynablocks = GetDynablocksFromAddress(emu->context, addr); + if(!dynablocks) + return NULL; + } + } + // check direct first, without lock + if(dynablocks->direct/* && (addr>=dynablocks->text) && (addr<(dynablocks->text+dynablocks->textsz))*/) + if((block = dynablocks->direct[addr-dynablocks->text])) + return block; + + int created = create; + block = AddNewDynablock(dynablocks, addr, &created); + if(!created) + return block; // existing block... + + if(box64_dynarec_dump) + pthread_mutex_lock(&my_context->mutex_dyndump); + // fill the block + block->x64_addr = (void*)addr; + if(0/*!FillBlock64(block, filladdr)*/) { + void* old = (void*)arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], 0); + if(old!=block && old) {// put it back in place, strange things are happening here! + dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old); + arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], (uintptr_t)old); + } + free(block); + block = NULL; + } + if(box64_dynarec_dump) + pthread_mutex_unlock(&my_context->mutex_dyndump); + // check size + if(block && block->x64_size) { + int blocksz = block->x64_size; + if(dynablocks->maxsz<blocksz) { + dynablocks->maxsz = blocksz; + for(int idx=(addr>>DYNAMAP_SHIFT)+1; idx<=((addr+blocksz)>>DYNAMAP_SHIFT); ++idx) { + dynablocklist_t* dblist; + if((dblist = getDB(idx))) + if(dblist->maxsz<blocksz) + dblist->maxsz = blocksz; + } + } + lockDB(); + protectDBnolock((uintptr_t)block->x64_addr, block->x64_size); + // fill-in jumptable + addJumpTableIfDefault64(block->x64_addr, block->block); + for(int i=0; i<block->sons_size; ++i) + addJumpTableIfDefault64(block->sons[i]->x64_addr, block->sons[i]->block); + unlockDB(); + } + + dynarec_log(LOG_DEBUG, " --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:0)), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0); + + return block; +} + +dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current) +{ + dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, *current); + if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) { + dynablock_t *father = db->father?db->father:db; + uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size); + if(hash!=father->hash) { + dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr); + // no more current if it gets invalidated too + if(*current && father->x64_addr>=(*current)->x64_addr && (father->x64_addr+father->x64_size)<(*current)->x64_addr) + *current = NULL; + // Free father, it's now invalid! + FreeDynablock(father); + // start again... (will create a new block) + db = internalDBGetBlock(emu, addr, addr, create, *current); + } else { + father->need_test = 0; + lockDB(); + protectDBnolock((uintptr_t)father->x64_addr, father->x64_size); + // fill back jumptable + addJumpTableIfDefault64(father->x64_addr, father->block); + for(int i=0; i<father->sons_size; ++i) + addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block); + unlockDB(); + } + } + return db; +} + +dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr) +{ + dynarec_log(LOG_DEBUG, "Creating AlternateBlock at %p for %p\n", (void*)addr, (void*)filladdr); + int create = 1; + dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, NULL); + if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) { + dynablock_t *father = db->father?db->father:db; + uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size); + if(hash!=father->hash) { + dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr); + // Free father, it's now invalid! + FreeDynablock(father); + // start again... (will create a new block) + db = internalDBGetBlock(emu, addr, filladdr, create, NULL); + } else { + father->need_test = 0; + lockDB(); + protectDBnolock((uintptr_t)father->x64_addr, father->x64_size); + // fill back jumptable + addJumpTableIfDefault64(father->x64_addr, father->block); + for(int i=0; i<father->sons_size; ++i) + addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block); + unlockDB(); + } + } + return db; +} diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h new file mode 100755 index 00000000..dd2ee4c0 --- /dev/null +++ b/src/dynarec/dynablock_private.h @@ -0,0 +1,36 @@ +#ifndef __DYNABLOCK_PRIVATE_H_ +#define __DYNABLOCK_PRIVATE_H_ + +typedef struct dynablocklist_s dynablocklist_t; + +typedef struct instsize_s { + unsigned int x64:4; + unsigned int nat:4; +} instsize_t; + +typedef struct dynablock_s { + dynablocklist_t* parent; + void* block; + int size; + void* x64_addr; + uintptr_t x64_size; + uint32_t hash; + uint8_t need_test; + uint8_t done; + uint8_t gone; + uint8_t dummy; + int isize; + dynablock_t** sons; // sons (kind-of dummy dynablock...) + int sons_size; + dynablock_t* father; // set only in the case of a son + instsize_t* instsize; +} dynablock_t; + +typedef struct dynablocklist_s { + uintptr_t text; + int textsz; + int maxsz; // maxblock size (for this block or previous block) + dynablock_t** direct; // direct mapping (waste of space, so the array is created at first write) +} dynablocklist_t; + +#endif //__DYNABLOCK_PRIVATE_H_ \ No newline at end of file diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c index c88cd61f..8769bc1e 100755 --- a/src/dynarec/dynarec.c +++ b/src/dynarec/dynarec.c @@ -22,10 +22,10 @@ #endif #ifdef DYNAREC -#ifdef ARM -void arm_prolog(x64emu_t* emu, void* addr) EXPORTDYN; -void arm_epilog() EXPORTDYN; -void arm_epilog_fast() EXPORTDYN; +#ifdef ARM64 +void arm64_prolog(x64emu_t* emu, void* addr) EXPORTDYN; +void arm64_epilog() EXPORTDYN; +void arm64_epilog_fast() EXPORTDYN; #endif #endif @@ -39,7 +39,7 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2) if(!addr) { x2-=8; // actual PC is 2 instructions ahead dynablock_t* db = FindDynablockFromNativeAddress(x2); - printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x86addr=%p)\n", x2, db, db?(void*)getX86Address(db, (uintptr_t)x2):NULL); + printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x64addr=%p)\n", x2, db, db?(void*)getX64Address(db, (uintptr_t)x2):NULL); } #endif dynablock_t* current = NULL; @@ -47,17 +47,17 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2) dynablock_t* block = DBGetBlock(emu, addr, 1, ¤t); if(!block) { // no block, let link table as is... - //tableupdate(arm_epilog, addr, table); - return arm_epilog; + //tableupdate(arm64_epilog, addr, table); + return arm64_epilog; } if(!block->done) { // not finished yet... leave linker //tableupdate(arm_linker, addr, table); - return arm_epilog; + return arm64_epilog; } if(!(jblock=block->block)) { // null block, but done: go to epilog, no linker here - return arm_epilog; + return arm64_epilog; } //dynablock_t *father = block->father?block->father:block; return jblock; @@ -82,7 +82,7 @@ void DynaCall(x64emu_t* emu, uintptr_t addr) } } #ifdef DYNAREC - if(!box86_dynarec) + if(!box64_dynarec) #endif EmuCall(emu, addr); #ifdef DYNAREC @@ -107,18 +107,18 @@ void DynaCall(x64emu_t* emu, uintptr_t addr) dynarec_log(LOG_DEBUG, "%04d|Calling Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu); Run(emu, 1); } else { - dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x86 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu); + dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu); CHECK_FLAGS(emu); // block is here, let's run it! - #ifdef ARM - arm_prolog(emu, block->block); + #ifdef ARM64 + arm64_prolog(emu, block->block); #endif } if(emu->fork) { int forktype = emu->fork; emu->quit = 0; emu->fork = 0; - emu = x86emu_fork(emu, forktype); + emu = x64emu_fork(emu, forktype); if(emu->type == EMUTYPE_MAIN) { ejb = GetJmpBuf(); ejb->emu = emu; @@ -170,7 +170,7 @@ int DynaRun(x64emu_t* emu) } } #ifdef DYNAREC - if(!box86_dynarec) + if(!box64_dynarec) #endif return Run(emu, 0); #ifdef DYNAREC @@ -186,17 +186,17 @@ int DynaRun(x64emu_t* emu) dynarec_log(LOG_DEBUG, "%04d|Running Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu); Run(emu, 1); } else { - dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x86 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu); + dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu); // block is here, let's run it! - #ifdef ARM - arm_prolog(emu, block->block); + #ifdef ARM64 + arm64_prolog(emu, block->block); #endif } if(emu->fork) { int forktype = emu->fork; emu->quit = 0; emu->fork = 0; - emu = x86emu_fork(emu, forktype); + emu = x64emu_fork(emu, forktype); if(emu->type == EMUTYPE_MAIN) { ejb = GetJmpBuf(); ejb->emu = emu; diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c new file mode 100755 index 00000000..b3b0b06c --- /dev/null +++ b/src/dynarec/dynarec_arm64.c @@ -0,0 +1,449 @@ +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include <string.h> + +#include "debug.h" +#include "box64context.h" +#include "custommem.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynablock.h" +#include "dynablock_private.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" +#include "elfloader.h" + +void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) { + uint8_t *ip = (uint8_t*)inst->addr; + if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') { + uintptr_t a = *(uintptr_t*)(ip+3); + if(a==0) { + dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":""); + } else { + dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":""); + } + } else { + if(dec) { + dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[1m":"", ip, DecodeX64Trace(dec, inst->addr)); + } else { + dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[1m":"", ip); + for(int i=0; i<inst->size; ++i) { + dynarec_log(LOG_NONE, "%02X ", ip[i]); + } + dynarec_log(LOG_NONE, " %s", name); + } + // print Call function name if possible + if(ip[0]==0xE8 || ip[0]==0xE9) { // Call / Jmp + uintptr_t nextaddr = (uintptr_t)ip + 5 + *((int32_t*)(ip+1)); + printFunctionAddr(nextaddr, "=> "); + } else if(ip[0]==0xFF) { + if(ip[1]==0x25) { + uintptr_t nextaddr = (uintptr_t)ip + 6 + *((int32_t*)(ip+2)); + printFunctionAddr(nextaddr, "=> "); + } + } + // end of line and colors + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); + } +} + +void add_next(dynarec_arm_t *dyn, uintptr_t addr) { + if(dyn->next_sz == dyn->next_cap) { + dyn->next_cap += 16; + dyn->next = (uintptr_t*)realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t)); + } + for(int i=0; i<dyn->next_sz; ++i) + if(dyn->next[i]==addr) + return; + dyn->next[dyn->next_sz++] = addr; +} +uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) { + // get closest, but no addresses befores + uintptr_t best = 0; + int i = 0; + while((i<dyn->next_sz) && (best!=addr)) { + if(dyn->next[i]<addr) { // remove the address, it's before current address + memmove(dyn->next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t)); + --dyn->next_sz; + } else { + if((dyn->next[i]<best) || !best) + best = dyn->next[i]; + ++i; + } + } + return best; +} +#define PK(A) (*((uint8_t*)(addr+(A)))) +int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n) +{ + if(!n) + return 1; + if(PK(0)==0x90) + return is_nops(dyn, addr+1, n-1); + if(n>1 && PK(0)==0x66) // if opcode start with 0x66, and there is more after, than is *can* be a NOP + return is_nops(dyn, addr+1, n-1); + if(n>2 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x00) + return is_nops(dyn, addr+3, n-3); + if(n>2 && PK(0)==0x8d && PK(1)==0x76 && PK(2)==0x00) // lea esi, [esi] + return is_nops(dyn, addr+3, n-3); + if(n>3 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x40 && PK(3)==0x00) + return is_nops(dyn, addr+4, n-4); + if(n>3 && PK(0)==0x8d && PK(1)==0x74 && PK(2)==0x26 && PK(3)==0x00) + return is_nops(dyn, addr+4, n-4); + if(n>4 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x44 && PK(3)==0x00 && PK(4)==0x00) + return is_nops(dyn, addr+5, n-5); + if(n>5 && PK(0)==0x8d && PK(1)==0xb6 && PK(2)==0x00 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00) + return is_nops(dyn, addr+6, n-6); + if(n>6 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x80 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) + return is_nops(dyn, addr+7, n-7); + if(n>6 && PK(0)==0x8d && PK(1)==0xb4 && PK(2)==0x26 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) // lea esi, [esi+0] + return is_nops(dyn, addr+7, n-7); + if(n>7 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x84 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00 && PK(7)==0x00) + return is_nops(dyn, addr+8, n-8); + return 0; +} + +// return size of next instuciton, -1 is unknown +// not all instrction are setup +int next_instruction(dynarec_arm_t *dyn, uintptr_t addr) +{ + uint8_t opcode = PK(0); + uint8_t nextop; + switch (opcode) { + case 0x66: + opcode = PK(1); + switch(opcode) { + case 0x90: + return 2; + } + break; + case 0x81: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr + 4; + case 0x83: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr + 1; + case 0x84: + case 0x85: + case 0x88: + case 0x89: + case 0x8A: + case 0x8B: + case 0x8C: + case 0x8D: + case 0x8E: + case 0x8F: + nextop = PK(1); + return fakeed(dyn, addr+2, 0, nextop)-addr; + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + case 0x90: + case 0x91: + case 0x92: + case 0x93: + case 0x94: + case 0x95: + case 0x96: + case 0x97: + case 0x98: + case 0x99: + case 0x9B: + case 0x9C: + case 0x9D: + case 0x9E: + case 0x9F: + return 1; + case 0xA0: + case 0xA1: + case 0xA2: + case 0xA3: + return 5; + case 0xB0: + case 0xB1: + case 0xB2: + case 0xB3: + case 0xB4: + case 0xB5: + case 0xB6: + case 0xB7: + return 2; + case 0xB8: + case 0xB9: + case 0xBA: + case 0xBB: + case 0xBC: + case 0xBD: + case 0xBE: + case 0xBF: + return 5; + case 0xFF: + nextop = PK(1); + switch((nextop>>3)&7) { + case 0: // INC Ed + case 1: //DEC Ed + case 2: // CALL Ed + case 4: // JMP Ed + case 6: // Push Ed + return fakeed(dyn, addr+2, 0, nextop)-addr; + } + break; + default: + break; + } + return -1; +} +#undef PK + +int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n) +{ + int i = 0; + while(i<n) { + int j=next_instruction(dyn, addr+i); + if(j<=0) return 0; + i+=j; + } + return (i==n)?1:0; +} + +uint32_t needed_flags(dynarec_arm_t *dyn, int ninst, uint32_t setf, int recurse) +{ + if(recurse == 10) + return X_PEND; + if(ninst == dyn->size) + return X_PEND; // no more instructions, or too many jmp loop, stop + + uint32_t needed = dyn->insts[ninst].x64.use_flags; + if(needed) { + setf &= ~needed; + if(!setf) // all flags already used, no need to continue + return needed; + } + + if(!needed && !dyn->insts[ninst].x64.set_flags && !dyn->insts[ninst].x64.jmp_insts) { + int start = ninst; + int end = ninst; + while(end<dyn->size && !dyn->insts[end].x64.use_flags && !dyn->insts[end].x64.set_flags && !dyn->insts[end].x64.jmp_insts) + ++end; + needed = needed_flags(dyn, end, setf, recurse); + for(int i=start; i<end; ++i) + dyn->insts[i].x64.need_flags = needed; + return needed; + } + + if(dyn->insts[ninst].x64.set_flags && (dyn->insts[ninst].x64.state_flags!=SF_MAYSET)) { + if((setf & ~dyn->insts[ninst].x64.set_flags) == 0) + return needed; // all done, gives all the flags needed + setf |= dyn->insts[ninst].x64.set_flags; // add new flags to continue + } + + int jinst = dyn->insts[ninst].x64.jmp_insts; + if(dyn->insts[ninst].x64.jmp) { + dyn->insts[ninst].x64.need_flags = (jinst==-1)?X_PEND:needed_flags(dyn, jinst, setf, recurse+1); + if(dyn->insts[ninst].x64.use_flags) // conditionnal jump + dyn->insts[ninst].x64.need_flags |= needed_flags(dyn, ninst+1, setf, recurse); + } else + dyn->insts[ninst].x64.need_flags = needed_flags(dyn, ninst+1, setf, recurse); + if(dyn->insts[ninst].x64.state_flags==SF_MAYSET) + needed |= dyn->insts[ninst].x64.need_flags; + else + needed |= (dyn->insts[ninst].x64.need_flags & ~dyn->insts[ninst].x64.set_flags); + if(needed == (X_PEND|X_ALL)) + needed = X_ALL; + return needed; +} + +instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int arm_size) +{ + // x64 instruction is <16 bytes + int toadd; + if(x64_size>arm_size) + toadd = 1 + x64_size/15; + else + toadd = 1 + arm_size/15; + if((*size)+toadd>(*cap)) { + *cap = (*size)+toadd; + insts = (instsize_t*)realloc(insts, (*cap)*sizeof(instsize_t)); + } + while(toadd) { + if(x64_size>15) + insts[*size].x64 = 15; + else + insts[*size].x64 = x64_size; + x64_size -= insts[*size].x64; + if(arm_size>15) + insts[*size].nat = 15; + else + insts[*size].nat = arm_size; + arm_size -= insts[*size].nat; + ++(*size); + --toadd; + } + return insts; +} + +void arm_pass0(dynarec_arm_t* dyn, uintptr_t addr); +void arm_pass1(dynarec_arm_t* dyn, uintptr_t addr); +void arm_pass2(dynarec_arm_t* dyn, uintptr_t addr); +void arm_pass3(dynarec_arm_t* dyn, uintptr_t addr); + +void* FillBlock(dynablock_t* block, uintptr_t addr) { + if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end) + return NULL; + // init the helper + dynarec_arm_t helper = {0}; + helper.start = addr; + arm_pass0(&helper, addr); + if(!helper.size) { + dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr); + block->done = 1; + free(helper.next); + return (void*)block; + } + helper.cap = helper.size+3; // needs epilog handling + helper.insts = (instruction_arm64_t*)calloc(helper.cap, sizeof(instruction_arm64_t)); + // pass 1, addresses, x64 jump addresses, flags + arm_pass1(&helper, addr); + // calculate barriers + uintptr_t start = helper.insts[0].x64.addr; + uintptr_t end = helper.insts[helper.size].x64.addr+helper.insts[helper.size].x64.size; + for(int i=0; i<helper.size; ++i) + if(helper.insts[i].x64.jmp) { + uintptr_t j = helper.insts[i].x64.jmp; + if(j<start || j>=end) + helper.insts[i].x64.jmp_insts = -1; + else { + // find jump address instruction + int k=-1; + for(int i2=0; i2<helper.size && k==-1; ++i2) { + if(helper.insts[i2].x64.addr==j) + k=i2; + } + if(k!=-1) // -1 if not found, mmm, probably wrong, exit anyway + helper.insts[k].x64.barrier = 1; + helper.insts[i].x64.jmp_insts = k; + } + } + for(int i=0; i<helper.size; ++i) + if(helper.insts[i].x64.set_flags && !helper.insts[i].x64.need_flags) { + helper.insts[i].x64.need_flags = needed_flags(&helper, i+1, helper.insts[i].x64.set_flags, 0); + if((helper.insts[i].x64.need_flags&X_PEND) && (helper.insts[i].x64.state_flags==SF_MAYSET)) + helper.insts[i].x64.need_flags = X_ALL; + } + + // pass 2, instruction size + arm_pass2(&helper, addr); + // ok, now allocate mapped memory, with executable flag on + int sz = helper.arm_size; + void* p = (void*)AllocDynarecMap(block, sz); + if(p==NULL) { + dynarec_log(LOG_DEBUG, "AllocDynarecMap(%p, %d) failed, cancelling block\n", block, sz); + free(helper.insts); + free(helper.next); + return NULL; + } + helper.block = p; + helper.arm_start = (uintptr_t)p; + if(helper.sons_size) { + helper.sons_x64 = (uintptr_t*)calloc(helper.sons_size, sizeof(uintptr_t)); + helper.sons_arm = (void**)calloc(helper.sons_size, sizeof(void*)); + } + // pass 3, emit (log emit arm opcode) + if(box64_dynarec_dump) { + dynarec_log(LOG_NONE, "%s%04d|Emitting %d bytes for %d x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.arm_size, helper.isize); + printFunctionAddr(helper.start, " => "); + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); + } + helper.arm_size = 0; + arm_pass3(&helper, addr); + if(sz!=helper.arm_size) { + printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%d) & pass3 (%d)!\n", sz, helper.arm_size); + uint8_t *dump = (uint8_t*)helper.start; + printf_log(LOG_NONE, "Dump of %d x64 opcodes:\n", helper.size); + for(int i=0; i<helper.size; ++i) { + printf_log(LOG_NONE, "%p:", dump); + for(; dump<(uint8_t*)helper.insts[i+1].x64.addr; ++dump) + printf_log(LOG_NONE, " %02X", *dump); + printf_log(LOG_NONE, "\t%d -> %d\n", helper.insts[i].size2, helper.insts[i].size); + } + printf_log(LOG_NONE, " ------------\n"); + } + // all done... + __clear_cache(p, p+sz); // need to clear the cache before execution... + // keep size of instructions for signal handling + { + size_t cap = 1; + for(int i=0; i<helper.size; ++i) + cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15; + size_t size = 0; + block->instsize = (instsize_t*)calloc(cap, sizeof(instsize_t)); + for(int i=0; i<helper.size; ++i) + block->instsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4); + block->instsize = addInst(block->instsize, &size, &cap, 0, 0); // add a "end of block" mark, just in case + } + // ok, free the helper now + free(helper.insts); + free(helper.next); + block->size = sz; + block->isize = helper.size; + block->block = p; + block->need_test = 0; + //block->x64_addr = (void*)start; + block->x64_size = end-start; + if(box64_dynarec_largest<block->x64_size) + box64_dynarec_largest = block->x64_size; + block->hash = X31_hash_code(block->x64_addr, block->x64_size); + // fill sons if any + dynablock_t** sons = NULL; + int sons_size = 0; + if(helper.sons_size) { + sons = (dynablock_t**)calloc(helper.sons_size, sizeof(dynablock_t*)); + for (int i=0; i<helper.sons_size; ++i) { + int created = 1; + dynablock_t *son = AddNewDynablock(block->parent, helper.sons_x64[i], &created); + if(created) { // avoid breaking a working block! + son->block = helper.sons_arm[i]; + son->x64_addr = (void*)helper.sons_x64[i]; + son->x64_size = end-helper.sons_x64[i]; + if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / ARM=%p)", son->x64_addr, son->block);} + son->father = block; + son->done = 1; + sons[sons_size++] = son; + if(!son->parent) + son->parent = block->parent; + } + } + if(sons_size) { + block->sons = sons; + block->sons_size = sons_size; + } else + free(sons); + } + free(helper.sons_x64); + free(helper.sons_arm); + block->done = 1; + return (void*)block; +} \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c new file mode 100755 index 00000000..784739ac --- /dev/null +++ b/src/dynarec/dynarec_arm64_functions.c @@ -0,0 +1,354 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include <string.h> +#include <math.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "emu/x87emu_private.h" +#include "x64trace.h" +#include "signals.h" +#include "dynarec_arm64.h" +#include "dynarec_arm64_private.h" +#include "dynarec_arm64_functions.h" + +void arm_fstp(x64emu_t* emu, void* p) +{ + if(ST0.q!=STld(0).ref) + D2LD(&ST0.d, p); + else + memcpy(p, &STld(0).ld, 10); +} + +void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n) +{ + dynarec_log(LOG_DEBUG, "R%ld=0x%lx (%ld)\n", n, reg, reg); +} + +void arm_f2xm1(x64emu_t* emu) +{ + ST0.d = exp2(ST0.d) - 1.0; +} +void arm_fyl2x(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d)*ST(1).d; +} +void arm_ftan(x64emu_t* emu) +{ + ST0.d = tan(ST0.d); +} +void arm_fpatan(x64emu_t* emu) +{ + ST1.d = atan2(ST1.d, ST0.d); +} +void arm_fxtract(x64emu_t* emu) +{ + int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52; + tmp32s -= 1023; + ST1.d /= exp2(tmp32s); + ST0.d = tmp32s; +} +void arm_fprem(x64emu_t* emu) +{ + int32_t tmp32s = ST0.d / ST1.d; + ST0.d -= ST1.d * tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} +void arm_fyl2xp1(x64emu_t* emu) +{ + ST(1).d = log2(ST0.d + 1.0)*ST(1).d; +} +void arm_fsincos(x64emu_t* emu) +{ + sincos(ST1.d, &ST1.d, &ST0.d); +} +void arm_frndint(x64emu_t* emu) +{ + ST0.d = fpu_round(emu, ST0.d); +} +void arm_fscale(x64emu_t* emu) +{ + ST0.d *= exp2(trunc(ST1.d)); +} +void arm_fsin(x64emu_t* emu) +{ + ST0.d = sin(ST0.d); +} +void arm_fcos(x64emu_t* emu) +{ + ST0.d = cos(ST0.d); +} + +void arm_fbld(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbld(emu, ed); +} + +void arm_fild64(x64emu_t* emu, int64_t* ed) +{ + int64_t tmp; + memcpy(&tmp, ed, sizeof(tmp)); + ST0.d = tmp; + STll(0).ll = tmp; + STll(0).ref = ST0.q; +} + +void arm_fbstp(x64emu_t* emu, uint8_t* ed) +{ + fpu_fbst(emu, ed); +} + +void arm_fistp64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + if(STll(0).ref==ST(0).q) { + memcpy(ed, &STll(0).ll, sizeof(int64_t)); + } else { + int64_t tmp; + if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d)) + tmp = 0x8000000000000000LL; + else + tmp = fpu_round(emu, ST0.d); + memcpy(ed, &tmp, sizeof(tmp)); + } +} + +void arm_fistt64(x64emu_t* emu, int64_t* ed) +{ + // used of memcpy to avoid aligments issues + int64_t tmp = ST0.d; + memcpy(ed, &tmp, sizeof(tmp)); +} + +void arm_fld(x64emu_t* emu, uint8_t* ed) +{ + memcpy(&STld(0).ld, ed, 10); + LD2D(&STld(0), &ST(0).d); + STld(0).ref = ST0.q; +} + +void arm_ud(x64emu_t* emu) +{ + emit_signal(emu, SIGILL, (void*)R_RIP, 0); +} + +void arm_fsave(x64emu_t* emu, uint8_t* ed) +{ + fpu_savenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + LD2D(p, &ST(i).d); + p+=10; + } +} +void arm_frstor(x64emu_t* emu, uint8_t* ed) +{ + fpu_loadenv(emu, (char*)ed, 0); + + uint8_t* p = ed; + p += 28; + for (int i=0; i<8; ++i) { + D2LD(&ST(i).d, p); + p+=10; + } + +} + +void arm_fprem1(x64emu_t* emu) +{ + // simplified version + int32_t tmp32s = round(ST0.d / ST1.d); + ST0.d -= ST1.d*tmp32s; + emu->sw.f.F87_C2 = 0; + emu->sw.f.F87_C0 = (tmp32s&1); + emu->sw.f.F87_C3 = ((tmp32s>>1)&1); + emu->sw.f.F87_C1 = ((tmp32s>>2)&1); +} + + +// Get a FPU single scratch reg +int fpu_get_scratch_single(dynarec_arm_t* dyn) +{ + return dyn->fpu_scratch++; // return an Sx +} +// Get a FPU double scratch reg +int fpu_get_scratch_double(dynarec_arm_t* dyn) +{ + int i = (dyn->fpu_scratch+1)&(~1); + dyn->fpu_scratch = i+2; + return i/2; // return a Dx +} +// Get a FPU quad scratch reg +int fpu_get_scratch_quad(dynarec_arm_t* dyn) +{ + if(dyn->fpu_scratch>4) { + if(dyn->fpu_extra_qscratch) { + dynarec_log(LOG_NONE, "Warning, Extra QScratch slot taken and need another one!\n"); + } else + dyn->fpu_extra_qscratch = fpu_get_reg_quad(dyn); + return dyn->fpu_extra_qscratch; + } + int i = (dyn->fpu_scratch+3)&(~3); + dyn->fpu_scratch = i+4; + return i/2; // Return a Dx, not a Qx +} +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_arm_t* dyn) +{ + dyn->fpu_scratch = 0; + if(dyn->fpu_extra_qscratch) { + fpu_free_reg_quad(dyn, dyn->fpu_extra_qscratch); + dyn->fpu_extra_qscratch = 0; + } +} +#define FPUFIRST 8 +// Get a FPU double reg +int fpu_get_reg_double(dynarec_arm_t* dyn) +{ + // TODO: check upper limit? + int i=0; + while (dyn->fpuused[i]) ++i; + dyn->fpuused[i] = 1; + return i+FPUFIRST; // return a Dx +} +// Free a FPU double reg +void fpu_free_reg_double(dynarec_arm_t* dyn, int reg) +{ + // TODO: check upper limit? + int i=reg-FPUFIRST; + dyn->fpuused[i] = 0; +} +// Get a FPU quad reg +int fpu_get_reg_quad(dynarec_arm_t* dyn) +{ + int i=0; + while (dyn->fpuused[i] || dyn->fpuused[i+1]) i+=2; + dyn->fpuused[i] = dyn->fpuused[i+1] = 1; + return i+FPUFIRST; // Return a Dx, not a Qx +} +// Free a FPU quad reg +void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg) +{ + int i=reg-FPUFIRST; + dyn->fpuused[i] = dyn->fpuused[i+1] = 0; +} +// Reset fpu regs counter +void fpu_reset_reg(dynarec_arm_t* dyn) +{ + dyn->fpu_reg = 0; + for (int i=0; i<24; ++i) + dyn->fpuused[i]=0; +} + +#define F8 *(uint8_t*)(addr++) +#define F32 *(uint32_t*)(addr+=4, addr-4) +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity) +{ + + uint32_t tested = (1<<parity)-1; + if((nextop&0xC0)==0xC0) + return 0; // direct register, no parity... + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = (sib>>3)&7; + if((sib&0x7)==5) { + uint32_t tmp = F32; + if (sib_reg!=4) { + // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough + return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0; + } else { + // just a constant... + return (tmp&tested)?0:1; + } + } else { + if(sib_reg==4 && parity<3) + return 0; // simple [reg] + // don't try [reg1 + reg2<<N], unless reg1 is ESP + return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0; + } + } else if((nextop&7)==5) { + uint32_t tmp = F32; + return (tmp&tested)?0:1; + } else { + return 0; + } + } else { + return 0; //Form [reg1 + reg2<<N + XXXXXX] + } +} + +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) +{ + if((nextop&0xC0)==0xC0) + return addr; + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + if((sib&0x7)==5) { + addr+=4; + } + } else if((nextop&7)==5) { + addr+=4; + } + } else { + if((nextop&7)==4) { + ++addr; + } + if(nextop&0x80) { + addr+=4; + } else { + ++addr; + } + } + return addr; +} +#undef F8 +#undef F32 + +int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) +{ +#define PK(a) *(uint8_t*)(addr+a) +#define PK64(a) *(uint64_t*)(addr+a) + + if(!addr) + return 0; + if(PK(0)==0xff && PK(1)==0x25) { // absolute jump, maybe the GOT + uintptr_t a1 = (PK64(2)); // need to add a check to see if the address is from the GOT ! + addr = *(uintptr_t*)a1; + } + if(addr<0x10000) // too low, that is suspicious + return 0; + onebridge_t *b = (onebridge_t*)(addr); + if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) { + // found ! + if(retn) *retn = (b->C3==0xC2)?b->N:0; + if(calladdress) *calladdress = addr+1; + return 1; + } + return 0; +#undef PK32 +#undef PK +} + diff --git a/src/dynarec/dynarec_arm64_functions.h b/src/dynarec/dynarec_arm64_functions.h new file mode 100755 index 00000000..d932aa4d --- /dev/null +++ b/src/dynarec/dynarec_arm64_functions.h @@ -0,0 +1,61 @@ +#ifndef __DYNAREC_ARM_FUNCTIONS_H__ +#define __DYNAREC_ARM_FUNCTIONS_H__ + +typedef struct x64emu_s x64emu_t; + +void arm_fstp(x64emu_t* emu, void* p); + +void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n); + +void arm_f2xm1(x64emu_t* emu); +void arm_fyl2x(x64emu_t* emu); +void arm_ftan(x64emu_t* emu); +void arm_fpatan(x64emu_t* emu); +void arm_fxtract(x64emu_t* emu); +void arm_fprem(x64emu_t* emu); +void arm_fyl2xp1(x64emu_t* emu); +void arm_fsincos(x64emu_t* emu); +void arm_frndint(x64emu_t* emu); +void arm_fscale(x64emu_t* emu); +void arm_fsin(x64emu_t* emu); +void arm_fcos(x64emu_t* emu); +void arm_fbld(x64emu_t* emu, uint8_t* ed); +void arm_fild64(x64emu_t* emu, int64_t* ed); +void arm_fbstp(x64emu_t* emu, uint8_t* ed); +void arm_fistp64(x64emu_t* emu, int64_t* ed); +void arm_fistt64(x64emu_t* emu, int64_t* ed); +void arm_fld(x64emu_t* emu, uint8_t* ed); +void arm_fsave(x64emu_t* emu, uint8_t* ed); +void arm_frstor(x64emu_t* emu, uint8_t* ed); +void arm_fprem1(x64emu_t* emu); + +void arm_ud(x64emu_t* emu); + +// Get an FPU single scratch reg +int fpu_get_scratch_single(dynarec_arm_t* dyn); +// Get an FPU double scratch reg +int fpu_get_scratch_double(dynarec_arm_t* dyn); +// Get an FPU quad scratch reg +int fpu_get_scratch_quad(dynarec_arm_t* dyn); +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_arm_t* dyn); +// Get an FPU double reg +int fpu_get_reg_double(dynarec_arm_t* dyn); +// Free a FPU double reg +void fpu_free_reg_double(dynarec_arm_t* dyn, int reg); +// Get an FPU quad reg +int fpu_get_reg_quad(dynarec_arm_t* dyn); +// Free a FPU quad reg +void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg); +// Reset fpu regs counter +void fpu_reset_reg(dynarec_arm_t* dyn); + +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity); +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); + +// Is what pointed at addr a native call? And if yes, to what function? +int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); + +#endif //__DYNAREC_ARM_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h new file mode 100755 index 00000000..8101cc2b --- /dev/null +++ b/src/dynarec/dynarec_arm64_private.h @@ -0,0 +1,56 @@ +#ifndef __DYNAREC_ARM_PRIVATE_H_ +#define __DYNAREC_ARM_PRIVATE_H_ + +#include "dynarec_private.h" + +typedef struct x64emu_s x64emu_t; + +typedef struct instruction_arm_s { + instruction_x64_t x64; + uintptr_t address; // (start) address of the arm emited instruction + uintptr_t epilog; // epilog of current instruction (can be start of next, of barrier stuff) + int size; // size of the arm emited instruction + int size2; // size of the arm emited instrucion after pass2 + uintptr_t mark, mark2, mark3; + uintptr_t markf; + uintptr_t markseg; + uintptr_t marklock; + int pass2choice;// value for choices that are fixed on pass2 for pass3 + uintptr_t natcall; + int retn; +} instruction_arm64_t; + +typedef struct dynarec_arm_s { + instruction_arm64_t *insts; + int32_t size; + int32_t cap; + uintptr_t start; // start of the block + uint32_t isize; // size in byte of x64 instructions included + void* block; // memory pointer where next instruction is emited + uintptr_t arm_start; // start of the arm code + int arm_size; // size of emitted arm code + int state_flags;// actual state for on-demand flags + int x87cache[8];// cache status for the 8 x87 register behind the fpu stack + int x87reg[8]; // reg used for x87cache entry + int mmxcache[8];// cache status for the 8 MMX registers + int ssecache[8];// cache status for the 8 SSE(2) registers + int fpuused[24];// all 8..31 double reg from fpu, used by x87, sse and mmx + int x87stack; // cache stack counter + int fpu_scratch;// scratch counter + int fpu_extra_qscratch; // some opcode need an extra quad scratch register + int fpu_reg; // x87/sse/mmx reg counter + int dfnone; // if defered flags is already set to df_none + uintptr_t* next; // variable array of "next" jump address + int next_sz; + int next_cap; + uintptr_t* sons_x64; // the x64 address of potential dynablock sons + void** sons_arm; // the arm address of potential dynablock sons + int sons_size; // number of potential dynablock sons +} dynarec_arm_t; + +void add_next(dynarec_arm_t *dyn, uintptr_t addr); +uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr); +int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n); +int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n); + +#endif //__DYNAREC_ARM_PRIVATE_H_ \ No newline at end of file diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h new file mode 100755 index 00000000..66eb3bfa --- /dev/null +++ b/src/dynarec/dynarec_private.h @@ -0,0 +1,35 @@ +#ifndef __DYNAREC_PRIVATE_H_ +#define __DYNAREC_PRIVATE_H_ + +// all flags for the use_flags field +#define X_CF (1<<0) +#define X_PF (1<<1) +#define X_AF (1<<2) +#define X_ZF (1<<3) +#define X_SF (1<<4) +#define X_OF (1<<5) +#define X_ALL ((1<<6)-1) +#define X_PEND (0x1000) + +// all state flags +#define SF_UNKNOWN 0 +#define SF_SET 1 +#define SF_PENDING 2 +#define SF_MAYSET 3 +#define SF_SUBSET 4 + +typedef struct instruction_x64_s { + uintptr_t addr; //address of the instruction + int32_t size; // size of the instruction + int barrier; // next instruction is a jump point, so no optim allowed + uintptr_t jmp; // offset to jump to, even if conditionnal (0 if not), no relative offset here + int jmp_insts; // instuction to jump to (-1 if out of the block) + uint32_t use_flags; // 0 or combination of X_?F + uint32_t set_flags; // 0 or combination of X_?F + uint32_t need_flags; // calculated + int state_flags; // One of SF_XXX state +} instruction_x64_t; + +void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name); + +#endif //__DYNAREC_PRIVATE_H_ |