diff options
| -rw-r--r-- | docs/USAGE.md | 1 | ||||
| -rw-r--r-- | docs/box64.pod | 3 | ||||
| -rw-r--r-- | docs/gen/usage.json | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 18 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_64.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_67.c | 12 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 10 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass2.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass3.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 3 | ||||
| -rw-r--r-- | src/dynarec/dynablock.c | 46 | ||||
| -rw-r--r-- | src/dynarec/dynablock_private.h | 7 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 3 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native.c | 41 | ||||
| -rw-r--r-- | src/dynarec/dynarec_native_pass.c | 5 | ||||
| -rw-r--r-- | src/dynarec/dynarec_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_private.h | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 4 | ||||
| -rw-r--r-- | src/include/dynablock.h | 3 | ||||
| -rw-r--r-- | src/include/env.h | 2 | ||||
| -rw-r--r-- | src/libtools/signals.c | 79 | ||||
| -rw-r--r-- | src/tools/env.c | 2 | ||||
| -rw-r--r-- | src/wrapped/wrappedlibc.c | 14 | ||||
| -rw-r--r-- | system/box64.box64rc | 9 |
24 files changed, 257 insertions, 31 deletions
diff --git a/docs/USAGE.md b/docs/USAGE.md index 33507f44..2a2256b3 100644 --- a/docs/USAGE.md +++ b/docs/USAGE.md @@ -581,6 +581,7 @@ Optimize CALL/RET opcodes. * 0: Do not optimize CALL/RET, use jump table. [Default] * 1: Try to optimize CALL/RET, skipping the jump table when possible. + * 2: Try to optimize CALL/RET, skipping the jump table when possible, adding code to handle return to dirty/modified block. ### BOX64_DYNAREC_DF diff --git a/docs/box64.pod b/docs/box64.pod index 82e7153b..787e99cd 100644 --- a/docs/box64.pod +++ b/docs/box64.pod @@ -154,12 +154,13 @@ Detect MonoBleedingEdge and apply conservative settings. * 1 : Detect MonoBleedingEdge and apply BOX64_DYNAREC_BIGBLOCK=0 BOX64_DYNAREC_STRONGMEM=1 when detected. [Default] -=item B<BOX64_DYNAREC_CALLRET> =I<0|1> +=item B<BOX64_DYNAREC_CALLRET> =I<0|1|2> Optimize CALL/RET opcodes. * 0 : Do not optimize CALL/RET, use jump table. [Default] * 1 : Try to optimize CALL/RET, skipping the jump table when possible. + * 2 : Try to optimize CALL/RET, skipping the jump table when possible, adding code to handle return to dirty/modified block. =item B<BOX64_DYNAREC_DF> =I<0|1> diff --git a/docs/gen/usage.json b/docs/gen/usage.json index 63be95e0..d42c4dbf 100644 --- a/docs/gen/usage.json +++ b/docs/gen/usage.json @@ -239,6 +239,11 @@ "key": "1", "description": "Try to optimize CALL/RET, skipping the jump table when possible.", "default": false + }, + { + "key": "2", + "description": "Try to optimize CALL/RET, skipping the jump table when possible, adding code to handle return to dirty/modified block.", + "default": false } ] }, diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index 0b828704..0a22ac90 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -16,6 +16,7 @@ #include "bridge.h" #include "x64trace.h" #include "dynarec_native.h" +#include "../dynablock_private.h" #include "custommem.h" #include "alternate.h" @@ -3386,11 +3387,17 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { - j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } @@ -3404,6 +3411,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin else j64 = addr+i32; jump_to_next(dyn, j64, 0, ninst, rex.is32bits); + if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET(); if (BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) { // jumps out of current dynablock... MARK; @@ -4046,7 +4054,10 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { @@ -4058,6 +4069,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst, rex.is32bits); + if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET(); if (BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) { // jumps out of current dynablock... MARK; diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c index f7265aad..1f9edc20 100644 --- a/src/dynarec/arm64/dynarec_arm64_64.c +++ b/src/dynarec/arm64/dynarec_arm64_64.c @@ -14,6 +14,7 @@ #include "emu/x64run_private.h" #include "x64trace.h" #include "dynarec_native.h" +#include "../dynablock_private.h" #include "custommem.h" #include "arm64_printer.h" @@ -1602,11 +1603,17 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { - j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } @@ -1614,6 +1621,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst, rex.is32bits); + if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET(); if (BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) { // jumps out of current dynablock... MARK; diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c index 1d95bb6b..3d9a7b35 100644 --- a/src/dynarec/arm64/dynarec_arm64_67.c +++ b/src/dynarec/arm64/dynarec_arm64_67.c @@ -13,6 +13,7 @@ #include "emu/x64run_private.h" #include "x64trace.h" #include "dynarec_native.h" +#include "../dynablock_private.h" #include "custommem.h" #include "arm64_printer.h" @@ -1708,11 +1709,17 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin // Push actual return address if(addr < (dyn->start+dyn->isize)) { // there is a next... - j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } else { - j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; + if(BOX64DRENV(dynarec_callret)>1) + j64 = CALLRET_GETRET(); + else + j64 = (dyn->insts)?(GETMARK-(dyn->native_size)):0; ADR_S20(x4, j64); MESSAGE(LOG_NONE, "\tCALLRET set return to +%di\n", j64>>2); } @@ -1720,6 +1727,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin } PUSH1z(xRIP); jump_to_next(dyn, 0, ed, ninst, rex.is32bits); + if(BOX64DRENV(dynarec_callret)>1) CALLRET_RET(); if(BOX64DRENV(dynarec_callret) && addr >= (dyn->start + dyn->isize)) { // jumps out of current dynablock... MARK; diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 771e80f5..2e152dfb 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -988,6 +988,16 @@ #define IF_ALIGNED(A) if (!dyn->insts[ninst].unaligned) #endif +#ifndef CALLRET_RET +#define CALLRET_RET() NOP +#endif +#ifndef CALLRET_GETRET +#define CALLRET_GETRET() (dyn->callrets?(dyn->callrets[dyn->callret_size].offs-dyn->native_size):0) +#endif +#ifndef CALLRET_LOOP +#define CALLRET_LOOP() NOP +#endif + #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) #define LDP_REGS(A, B) LDPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass2.h b/src/dynarec/arm64/dynarec_arm64_pass2.h index 0975908e..6fcb5b50 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass2.h +++ b/src/dynarec/arm64/dynarec_arm64_pass2.h @@ -17,3 +17,5 @@ #define INST_NAME(name) #define TABLE64(A, V) {Table64(dyn, (V), 2); EMIT(0);} #define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; Table64(dyn, v.q, 2); EMIT(0);} +#define CALLRET_RET() do {dyn->callrets[dyn->callret_size].type = 0; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0) +#define CALLRET_LOOP() do {dyn->callrets[dyn->callret_size].type = 1; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0) \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_pass3.h b/src/dynarec/arm64/dynarec_arm64_pass3.h index b274cabb..a0d79f30 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass3.h +++ b/src/dynarec/arm64/dynarec_arm64_pass3.h @@ -23,3 +23,5 @@ #define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex) #define TABLE64(A, V) {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); LDRx_literal(A, val64offset);} #define FTABLE64(A, V) {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, " FTable64: %g\n", v.d); VLDR64_literal(A, val64offset);} +#define CALLRET_RET() do {dyn->callrets[dyn->callret_size].type = 0; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0) +#define CALLRET_LOOP() do {dyn->callrets[dyn->callret_size].type = 1; dyn->callrets[dyn->callret_size++].offs = dyn->native_size; EMIT(ARCH_NOP); } while(0) \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 740e7e9e..1c5f7008 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -47,6 +47,7 @@ typedef union sse_cache_s { uint8_t write:1; }; } sse_cache_t; +typedef struct callret_s callret_t; typedef struct neoncache_s { // Neon cache neon_cache_t neoncache[32]; @@ -157,6 +158,8 @@ typedef struct dynarec_arm_s { dynablock_t* dynablock; instsize_t* instsize; size_t insts_size; // size of the instruction size array (calculated) + int callret_size; // size of the array + callret_t* callrets; // arrey of callret return, with NOP / UDF depending if the block is clean or dirty uintptr_t forward; // address of the last end of code while testing forward uintptr_t forward_to; // address of the next jump to (to check if everything is ok) int32_t forward_size; // size at the forward point diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c index c12210b5..f0a8bbd9 100644 --- a/src/dynarec/dynablock.c +++ b/src/dynarec/dynablock.c @@ -50,15 +50,24 @@ dynablock_t* InvalidDynablock(dynablock_t* db, int need_lock) db->done = 0; db->gone = 1; uintptr_t db_size = db->x64_size; + #ifdef ARCH_NOP + if(db->callret_size) { + // mark all callrets to UDF + for(int i=0; i<db->callret_size; ++i) + *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP; + ClearCache(db->block, db->size); + } + #endif if(db_size && my_context) { uint32_t n = rb_get(my_context->db_sizes, db_size); if(n>1) rb_set(my_context->db_sizes, db_size, db_size+1, n-1); - else + else { rb_unset(my_context->db_sizes, db_size, db_size+1); - if(db_size == my_context->max_db_size) { - my_context->max_db_size = rb_get_righter(my_context->db_sizes); - dynarec_log(LOG_INFO, "BOX64 Dynarec: lower max_db=%d\n", my_context->max_db_size); + if(db_size == my_context->max_db_size) { + my_context->max_db_size = rb_get_righter(my_context->db_sizes); + dynarec_log(LOG_INFO, "BOX64 Dynarec: lower max_db=%d\n", my_context->max_db_size); + } } } if(need_lock) @@ -135,7 +144,14 @@ void MarkDynablock(dynablock_t* db) else db->previous = old; } + } + #ifdef ARCH_NOP + else if(db->callret_size) { + // mark all callrets to UDF + for(int i=0; i<db->callret_size; ++i) + *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_UDF; } + #endif } } @@ -290,8 +306,17 @@ dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, int is32bits) dynarec_log(LOG_DEBUG, "Validating block %p from %p:%p (hash:%X, always_test:%d) for %p\n", db, db->x64_addr, db->x64_addr+db->x64_size-1, db->hash, db->always_test, (void*)addr); if(db->always_test) protectDB((uintptr_t)db->x64_addr, db->x64_size); - else + else { + #ifdef ARCH_NOP + if(db->callret_size) { + // mark all callrets to UDF + for(int i=0; i<db->callret_size; ++i) + *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP; + ClearCache(db->block, db->size); + } + #endif protectDBJumpTable((uintptr_t)db->x64_addr, db->x64_size, db->block, db->jmpnext); + } } if(!need_lock) mutex_unlock(&my_context->mutex_dyndump); @@ -326,8 +351,17 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, } else { if(db->always_test) protectDB((uintptr_t)db->x64_addr, db->x64_size); - else + else { + #ifdef ARCH_NOP + if(db->callret_size) { + // mark all callrets to UDF + for(int i=0; i<db->callret_size; ++i) + *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP; + ClearCache(db->block, db->size); + } + #endif protectDBJumpTable((uintptr_t)db->x64_addr, db->x64_size, db->block, db->jmpnext); + } } if(!need_lock) mutex_unlock(&my_context->mutex_dyndump); diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h index b9e5f55d..8e174a63 100644 --- a/src/dynarec/dynablock_private.h +++ b/src/dynarec/dynablock_private.h @@ -6,6 +6,11 @@ typedef struct instsize_s { unsigned char nat:4; } instsize_t; +typedef struct callret_s { + uint32_t offs:31; + uint32_t type:1; +} callret_t; + typedef struct dynablock_s { void* block; // block-sizeof(void*) == self void* actual_block; // the actual start of the block (so block-sizeof(void*)) @@ -23,6 +28,8 @@ typedef struct dynablock_s { instsize_t* instsize; void* arch; // arch dependant per inst info (can be NULL) size_t arch_size; // size of of arch dependant infos + int callret_size; // size of the array + callret_t* callrets; // array of callret return, with NOP / UDF depending if the block is clean or dirty void* jmpnext; // a branch jmpnext code when block is marked } dynablock_t; diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index eaf64fd8..44d767b2 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -32,6 +32,9 @@ #define ARCH_UNALIGNED(A, B) arch_unaligned(A, B) extern uint32_t arm64_crc(void* p, uint32_t len); #define ARCH_CRC(A, B) if(arm64_crc32) return arm64_crc(A, B) + +#define ARCH_NOP 0b11010101000000110010000000011111 +#define ARCH_UDF 0xcafe #elif defined(LA64) #define instruction_native_t instruction_la64_t diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index 58aa4493..df6ab348 100644 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -504,6 +504,7 @@ static int static_jmps[MAX_INSTS+2]; static uintptr_t static_next[MAX_INSTS+2]; static uint64_t static_table64[(MAX_INSTS+3)/4]; static instruction_native_t static_insts[MAX_INSTS+2] = {0}; +static callret_t static_callrets[MAX_INSTS+2] = {0}; // TODO: ninst could be a uint16_t instead of an int, that could same some temp. memory void ClearCache(void* start, size_t len) @@ -653,7 +654,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit int i = helper.jmps[ii]; uintptr_t j = helper.insts[i].x64.jmp; helper.insts[i].x64.jmp_insts = -1; - if(j<start || j>=end || j==helper.insts[i].x64.addr) { + #ifndef ARCH_NOP + if(j<start || j>=end || j==helper.insts[i].x64.addr) + #else + if(j<start || j>=end) + #endif + { helper.insts[i].x64.need_after |= X_PEND; } else { // find jump address instruction @@ -691,8 +697,12 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit helper.insts[k].x64.barrier |= BARRIER_FULL; // special case, loop on itself with some nop in between if(k<i && !helper.insts[i].x64.has_next && is_nops(&helper, helper.insts[k].x64.addr, helper.insts[i].x64.addr-helper.insts[k].x64.addr)) { + #ifndef ARCH_NOP helper.always_test = 1; k = -1; + #else + helper.insts[k].x64.self_loop = 1; + #endif } helper.insts[i].x64.jmp_insts = k; } @@ -737,7 +747,11 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit for(int ii=0; ii<helper.jmp_sz && !helper.always_test; ++ii) { int i = helper.jmps[ii]; if(helper.insts[i].x64.alive && (helper.insts[i].x64.jmp==helper.insts[i].x64.addr)) { + #ifndef ARCH_NOP helper.always_test = 1; + #else + helper.insts[i].x64.self_loop = 1; + #endif } } // no need for next anymore @@ -753,6 +767,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit } // pass 2, instruction size + helper.callrets = static_callrets; native_pass2(&helper, addr, alternate, is32bits, inst_max); if(helper.abort) { if(BOX64DRENV(dynarec_dump) || BOX64ENV(dynarec_log))dynarec_log(LOG_NONE, "Abort dynablock on pass2\n"); @@ -778,15 +793,17 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit size_t insts_rsize = (helper.insts_size+2)*sizeof(instsize_t); insts_rsize = (insts_rsize+7)&~7; // round the size... size_t arch_size = ARCH_SIZE(&helper); + size_t callret_size = helper.callret_size*4; // ok, now allocate mapped memory, with executable flag on - size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size; - // dynablock_t* block (arm insts) table64 jmpnext code instsize arch + size_t sz = sizeof(void*) + native_size + helper.table64size*sizeof(uint64_t) + 4*sizeof(void*) + insts_rsize + arch_size + callret_size; + // dynablock_t* block (arm insts) table64 jmpnext code instsize arch callrets void* actual_p = (void*)AllocDynarecMap(sz); void* p = (void*)(((uintptr_t)actual_p) + sizeof(void*)); void* tablestart = p + native_size; void* next = tablestart + helper.table64size*sizeof(uint64_t); void* instsize = next + 4*sizeof(void*); void* arch = instsize + insts_rsize; + void* callrets = arch + arch_size; if(actual_p==NULL) { dynarec_log(LOG_INFO, "AllocDynarecMap(%p, %zu) failed, canceling block\n", block, sz); CancelBlock64(0); @@ -801,9 +818,13 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit *(dynablock_t**)actual_p = block; helper.table64cap = helper.table64size; helper.table64 = (uint64_t*)helper.tablestart; + helper.callrets = (callret_t*)callrets; + if(callret_size) + memcpy(helper.callrets, static_callrets, helper.callret_size*sizeof(callret_t)); + helper.callret_size = 0; // pass 3, emit (log emit native opcode) if(BOX64DRENV(dynarec_dump)) { - dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u %s bytes (native=%zu, table64=%zu, instsize=%zu, arch=%zu)", (BOX64DRENV(dynarec_dump)>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize, is32bits?"x86":"x64", native_size, helper.table64size*sizeof(uint64_t), insts_rsize, arch_size); + dynarec_log(LOG_NONE, "%s%04d|Emitting %zu bytes for %u %s bytes (native=%zu, table64=%zu, instsize=%zu, arch=%zu, callrets=%zu)", (BOX64DRENV(dynarec_dump)>1)?"\e[01;36m":"", GetTID(), helper.native_size, helper.isize, is32bits?"x86":"x64", native_size, helper.table64size*sizeof(uint64_t), insts_rsize, arch_size, callret_size); printFunctionAddr(helper.start, " => "); dynarec_log(LOG_NONE, "%s\n", (BOX64DRENV(dynarec_dump)>1)?"\e[m":""); } @@ -846,6 +867,8 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit block->arch = NULL; block->arch_size = arch_size; } + block->callret_size = helper.callret_size; + block->callrets = helper.callrets; *(dynablock_t**)next = block; *(void**)(next+3*sizeof(void*)) = native_next; CreateJmpNext(block->jmpnext, next+3*sizeof(void*)); @@ -889,7 +912,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit printf_log(LOG_NONE, "Warning, insts_size difference in block between pass2 (%zu) and pass3 (%zu), allocated: %zu\n", oldinstsize, helper.insts_size, insts_rsize/sizeof(instsize_t)); } if(!isprotectedDB(addr, end-addr)) { - dynarec_log(LOG_DEBUG, "Warning, block unprotected while being processed %p:%ld, marking as need_test\n", block->x64_addr, block->x64_size); + dynarec_log(LOG_INFO, "Warning, block unprotected while being processed %p:%ld, marking as need_test\n", block->x64_addr, block->x64_size); block->dirty = 1; //protectDB(addr, end-addr); } @@ -898,7 +921,13 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit block->always_test = 1; } if(block->always_test) { - dynarec_log(LOG_DEBUG, "Note: block marked as always dirty %p:%ld\n", block->x64_addr, block->x64_size); + dynarec_log(LOG_INFO, "Note: block marked as always dirty %p:%ld\n", block->x64_addr, block->x64_size); + #ifdef ARCH_NOP + // mark callrets to trigger SIGILL to check clean state + if(block->callret_size) + for(int i=0; i<block->callret_size; ++i) + *(uint32_t*)(block->block+block->callrets[i].offs) = ARCH_UDF; + #endif } current_helper = NULL; //block->done = 1; diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index bf30503c..99cb2449 100644 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -14,6 +14,7 @@ #include "x64trace.h" #include "dynablock.h" #include "dynarec_native.h" +#include "dynablock_private.h" #include "custommem.h" #include "elfloader.h" #include "x64test.h" @@ -114,6 +115,10 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr, int alternate, int dyn->f.dfnone_here = 0; NEW_INST; MESSAGE(LOG_DUMP, "New Instruction %s:%p, native:%p\n", is32bits?"x86":"x64",(void*)addr, (void*)dyn->block); + #ifdef ARCH_NOP + if(dyn->insts[ninst].x64.alive && dyn->insts[ninst].x64.self_loop) + CALLRET_LOOP(); + #endif if(!ninst) { GOTEST(x1, x2); } diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h index 111e7c74..9e3c55be 100644 --- a/src/dynarec/dynarec_private.h +++ b/src/dynarec/dynarec_private.h @@ -41,6 +41,7 @@ typedef struct instruction_x64_s { uint8_t has_next:1; // does this opcode can continue to the next? uint8_t has_callret:1; // this instruction have an optimized call setup uint8_t alive:1; // this opcode gets executed (0 if dead code in that block) + uint8_t self_loop:1; // this is a landing address for a self-loop (loop on itslef with no exit) uint8_t barrier; // next instruction is a jump point, so no optim allowed uint8_t state_flags;// One of SF_XXX state uint8_t use_flags; // 0 or combination of X_?F diff --git a/src/dynarec/la64/dynarec_la64_private.h b/src/dynarec/la64/dynarec_la64_private.h index 7c40ca27..0246007e 100644 --- a/src/dynarec/la64/dynarec_la64_private.h +++ b/src/dynarec/la64/dynarec_la64_private.h @@ -68,6 +68,8 @@ typedef struct flagcache_s { uint8_t dfnone_here;// defered flags is cleared in this opcode } flagcache_t; +typedef struct callret_s callret_t; + typedef struct instruction_la64_s { instruction_x64_t x64; uintptr_t address; // (start) address of the arm emitted instruction @@ -134,6 +136,8 @@ typedef struct dynarec_la64_s { dynablock_t* dynablock; instsize_t* instsize; size_t insts_size; // size of the instruction size array (calculated) + int callret_size; // size of the array + callret_t* callrets; // arrey of callret return, with NOP / UDF depending if the block is clean or dirty uintptr_t forward; // address of the last end of code while testing forward uintptr_t forward_to; // address of the next jump to (to check if everything is ok) int32_t forward_size; // size at the forward point diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 99281462..7519d99c 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -100,6 +100,8 @@ typedef struct flagcache_s { uint8_t dfnone_here;// defered flags is cleared in this opcode } flagcache_t; +typedef struct callret_s callret_t; + typedef struct instruction_rv64_s { instruction_x64_t x64; uintptr_t address; // (start) address of the riscv emitted instruction @@ -169,6 +171,8 @@ typedef struct dynarec_rv64_s { dynablock_t* dynablock; instsize_t* instsize; size_t insts_size; // size of the instruction size array (calculated) + int callret_size; // size of the array + callret_t* callrets; // arrey of callret return, with NOP / UDF depending if the block is clean or dirty uint8_t smwrite; // for strongmem model emulation uintptr_t forward; // address of the last end of code while testing forward uintptr_t forward_to; // address of the next jump to (to check if everything is ok) diff --git a/src/include/dynablock.h b/src/include/dynablock.h index 757ca4ae..b9aeddc0 100644 --- a/src/include/dynablock.h +++ b/src/include/dynablock.h @@ -21,4 +21,7 @@ dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, // for use in signal handler void cancelFillBlock(void); +// clear instruction cache on a range +void ClearCache(void* start, size_t len); + #endif //__DYNABLOCK_H_ \ No newline at end of file diff --git a/src/include/env.h b/src/include/env.h index d820931d..3398f3b0 100644 --- a/src/include/env.h +++ b/src/include/env.h @@ -38,7 +38,7 @@ extern char* ftrace_name; BOOLEAN(BOX64_DYNAREC_ALIGNED_ATOMICS, dynarec_aligned_atomics, 0) \ INTEGER(BOX64_DYNAREC_BIGBLOCK, dynarec_bigblock, 2, 0, 3) \ BOOLEAN(BOX64_DYNAREC_BLEEDING_EDGE, dynarec_bleeding_edge, 1) \ - BOOLEAN(BOX64_DYNAREC_CALLRET, dynarec_callret, 0) \ + INTEGER(BOX64_DYNAREC_CALLRET, dynarec_callret, 0, 0, 2) \ BOOLEAN(BOX64_DYNAREC_DF, dynarec_df, 1) \ INTEGER(BOX64_DYNAREC_DIRTY, dynarec_dirty, 0, 0, 2) \ BOOLEAN(BOX64_DYNAREC_DIV0, dynarec_div0, 0) \ diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 21e79cab..99ddd41e 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -1616,6 +1616,80 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) return; } } + #ifdef ARCH_NOP + if(sig==SIGILL) { + db = FindDynablockFromNativeAddress(pc); + if(db) + x64pc = getX64Address(db, (uintptr_t)pc); // this will be incorect in the case of the callret! + db_searched = 1; + if(db && db->callret_size) { + int is_callrets = 0; + int type_callret = 0; + for(int i=0; i<db->callret_size && !is_callrets; ++i) + if(pc==(db->block+db->callrets[i].offs)) { + is_callrets = 1; + type_callret = db->callrets[i].type; + } + if(is_callrets) { + if(!type_callret) { + // adjust x64pc for "ret" type + #ifdef __aarch64__ + x64pc = p->uc_mcontext.regs[27]; + #elif defined(LA64) + x64pc = p->uc_mcontext.__gregs[20]; + #elif defined(RV64) + x64pc = p->uc_mcontext.__gregs[22]; + #endif + } + // check if block is still valid + int is_hotpage = checkInHotPage(x64pc); + uint32_t hash = (db->gone || is_hotpage)?0:X31_hash_code(db->x64_addr, db->x64_size); + if(!db->gone && !is_hotpage && hash==db->hash) { + dynarec_log(LOG_INFO, "Dynablock (%p, x64addr=%p, always_test=%d) is clean, %s continuing at %p (%p)!\n", db, db->x64_addr, db->always_test, type_callret?"self-loop":"ret from callret", (void*)x64pc, (void*)addr); + // it's good! go next opcode + #ifdef __aarch64__ + p->uc_mcontext.pc+=4; + #elif defined(LA64) + p->uc_mcontext.__pc+=4; + #elif defined(RV64) + p->uc_mcontext.__gregs[REG_PC]+=4; + #endif + if(db->always_test) + protectDB((uintptr_t)db->x64_addr, 1); + else { + if(db->callret_size) { + // mark all callrets to NOP + for(int i=0; i<db->callret_size; ++i) + *(uint32_t*)(db->block+db->callrets[i].offs) = ARCH_NOP; + ClearCache(db->block, db->size); + } + protectDBJumpTable((uintptr_t)db->x64_addr, db->x64_size, db->block, db->jmpnext); + } + return; + } else { + // dynablock got dirty! need to get out of it!!! + if(emu->jmpbuf) { + copyUCTXreg2Emu(emu, p, x64pc); + // only copy as it's a return address, so there is just the "epilog" to mimic here on "ret" type. "loop" type need everything + if(type_callret) { + adjustregs(emu); + if(db && db->arch_size) + ARCH_ADJUST(db, emu, p, x64pc); + } + dynarec_log(LOG_INFO, "Dynablock (%p, x64addr=%p) %s, getting out at %s %p (%p)!\n", db, db->x64_addr, is_hotpage?"in HotPage":"dirty",(void*)R_RIP, type_callret?"self-loop":"ret from callret", (void*)addr); + emu->test.clean = 0; + #ifdef ANDROID + siglongjmp(*(JUMPBUFF*)emu->jmpbuf, 2); + #else + siglongjmp(emu->jmpbuf, 2); + #endif + } + dynarec_log(LOG_INFO, "Warning, Dirty %s (%p for db %p/%p) detected, but jmpbuffer not ready!\n", type_callret?"self-loop":"ret from callret", (void*)addr, db, (void*)db->x64_addr); + } + } + } + } + #endif int Locks = unlockMutex(); uint32_t prot = getProtection((uintptr_t)addr); #ifdef BAD_SIGNAL @@ -1737,7 +1811,10 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "%04d|Repeated SIGSEGV with Access error on % glitch_pc = NULL; glitch_addr = NULL; glitch_prot = 0; - } + relockMutex(Locks); + unlock_signal(); + return; // try again + } if(addr && pc && ((prot&(PROT_READ|PROT_WRITE))==(PROT_READ|PROT_WRITE))) { static void* glitch2_pc = NULL; static void* glitch2_addr = NULL; diff --git a/src/tools/env.c b/src/tools/env.c index f1390fbe..00e116f3 100644 --- a/src/tools/env.c +++ b/src/tools/env.c @@ -627,6 +627,7 @@ void RecordEnvMappings(uintptr_t addr, size_t length, int fd) if (k != kh_end(box64env_entries)) mapping->env = &kh_value(box64env_entries, k); } + dynarec_log(LOG_INFO, "Mapping %s (%s) in %p-%p\n", fullname, lowercase_filename, (void*)addr, (void*)(addr+length)); } else mapping = kh_value(mapping_entries, k); @@ -659,6 +660,7 @@ void RemoveMapping(uintptr_t addr, size_t length) start = end; } while(end!=UINTPTR_MAX); // no occurence found, delete mapping + dynarec_log(LOG_INFO, "Delete Mapping %s (%s) in %p(%p)-%p\n", mapping->fullname, mapping->filename, (void*)addr, (void*)mapping->start, (void*)(addr+length)); khint_t k = kh_get(mapping_entry, mapping_entries, mapping->filename); if(k!=kh_end(mapping_entries)) kh_del(mapping_entry, mapping_entries, k); diff --git a/src/wrapped/wrappedlibc.c b/src/wrapped/wrappedlibc.c index 7eb5aa9e..20ced661 100644 --- a/src/wrapped/wrappedlibc.c +++ b/src/wrapped/wrappedlibc.c @@ -3029,13 +3029,6 @@ EXPORT void* my_mmap64(x64emu_t* emu, void *addr, size_t length, int prot, int f } #endif if(ret!=MAP_FAILED) { - if((flags&MAP_SHARED) && (fd>0)) { - uint32_t flags = fcntl(fd, F_GETFL); - if((flags&O_ACCMODE)==O_RDWR) { - if((BOX64ENV(log)>=LOG_DEBUG || BOX64ENV(dynarec_log)>=LOG_DEBUG)) {printf_log(LOG_NONE, "Note: Marking the region (%p-%p prot=%x) as NEVERCLEAN because fd have O_RDWR attribute\n", ret, ret+length, prot);} - prot |= PROT_NEVERCLEAN; - } - } if(emu && !(flags&MAP_ANONYMOUS) && (fd>0)) { DetectUnityPlayer(fd); // the last_mmap will allow mmap created by wine, even those that have hole, to be fully tracked as one single mmap @@ -3044,6 +3037,13 @@ EXPORT void* my_mmap64(x64emu_t* emu, void *addr, size_t length, int prot, int f else RecordEnvMappings((uintptr_t)ret, length, fd); } + if((flags&MAP_SHARED) && (fd>0)) { + uint32_t flags = fcntl(fd, F_GETFL); + if((flags&O_ACCMODE)==O_RDWR) { + if((BOX64ENV(log)>=LOG_DEBUG || BOX64ENV(dynarec_log)>=LOG_DEBUG)) {printf_log(LOG_NONE, "Note: Marking the region (%p-%p prot=%x) as NEVERCLEAN because fd have O_RDWR attribute\n", ret, ret+length, prot);} + prot |= PROT_NEVERCLEAN; + } + } // hack to capture full size of the mmap done by wine if(emu && (fd==-1) && (flags==(MAP_PRIVATE|MAP_ANON))) { last_mmap_addr = ret; diff --git a/system/box64.box64rc b/system/box64.box64rc index 53f49a4a..3d47fdeb 100644 --- a/system/box64.box64rc +++ b/system/box64.box64rc @@ -351,7 +351,7 @@ BOX64_DYNAREC_ALIGNED_ATOMICS=1 [Blacksad.exe] BOX64_DYNAREC_STRONGMEM=1 BOX64_DYNAREC_BIGBLOCK=3 -BOX64_DYNAREC_CALLRET=0 +BOX64_DYNAREC_CALLRET=2 [Battle.net.exe] BOX64_DYNAREC_BIGBLOCK=0 @@ -465,7 +465,7 @@ BOX64_DYNAREC_BIGBLOCK=0 BOX64_DYNAREC_SAFEFLAGS=2 BOX64_DYNAREC_STRONGMEM=1 BOX64_DYNAREC_BIGBLOCK=3 -BOX64_DYNAREC_CALLRET=0 +BOX64_DYNAREC_CALLRET=2 BOX64_SSE_FLUSHTO0=1 BOX64_DYNAREC_DIRTY=1 @@ -734,6 +734,11 @@ BOX64_DYNAREC_BIGBLOCK=3 BOX64_DYNAREC_CALLRET=1 BOX64_DYNAREC_SAFEFLAGS=0 +[Trials of Innocence.exe] +BOX64_DYNAREC_BIGBLOCK=3 +BOX64_DYNAREC_CALLRET=2 +BOX64_DYNAREC_DIRTY=0 + [TT2.exe] BOX64_DYNAREC_STRONGMEM=1 BOX64_DYNAREC_BIGBLOCK=3 |