diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-01-11 12:38:23 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-01-11 12:38:35 +0100 |
| commit | 35a68282097ec90c3dcd473402e234aa8a6ba4a9 (patch) | |
| tree | b3b3bcbd20ddaac7af0f9b5b730402cb4a20537c /src | |
| parent | 2b66675a08f7f56eb5840330247484f56cdf685a (diff) | |
| download | box64-35a68282097ec90c3dcd473402e234aa8a6ba4a9.tar.gz box64-35a68282097ec90c3dcd473402e234aa8a6ba4a9.zip | |
[ARM64_DYNAREC] Better handling unaligned access to device memory, with regeration of code
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 48 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 16 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_arch.c | 29 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_arch.h | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 3 | ||||
| -rw-r--r-- | src/dynarec/dynarec_helper.h | 2 | ||||
| -rw-r--r-- | src/libtools/signals.c | 110 |
11 files changed, 178 insertions, 43 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index be7bf0c7..8d604f20 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -927,10 +927,12 @@ int convert_bitmask(uint64_t bitmask); #define VLD64(A, B, C) if(unscaled) {VLDR64_I9(A, B, C);} else {VLDR64_U12(A, B, C);} #define VLD32(A, B, C) if(unscaled) {VLDR32_I9(A, B, C);} else {VLDR32_U12(A, B, C);} #define VLD16(A, B, C) if(unscaled) {VLDR16_I9(A, B, C);} else {VLDR16_U12(A, B, C);} +#define VLD8(A, B, C) if(unscaled) {VLDR8_I9(A, B, C);} else {VLDR8_U12(A, B, C);} #define VST128(A, B, C) if(unscaled) {VSTR128_I9(A, B, C);} else {VSTR128_U12(A, B, C);} #define VST64(A, B, C) if(unscaled) {VSTR64_I9(A, B, C);} else {VSTR64_U12(A, B, C);} #define VST32(A, B, C) if(unscaled) {VSTR32_I9(A, B, C);} else {VSTR32_U12(A, B, C);} #define VST16(A, B, C) if(unscaled) {VSTR16_I9(A, B, C);} else {VSTR16_U12(A, B, C);} +#define VST8(A, B, C) if(unscaled) {VSTR8_I9(A, B, C);} else {VSTR8_U12(A, B, C);} #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) #define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt)) diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index c7766298..e387b8bc 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1412,8 +1412,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg MOVxw_REG(TO_NAT((nextop & 7) + (rex.b << 3)), gd); } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0); - STxw(gd, ed, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, &lock, 0, 0); + if(gd==wback) { + MOVx_REG(x2, wback); + wback = x2; + } + for(int i=0; i<(1<<(2+rex.w)); ++i) { + STURB_I9(gd, wback, i); + RORxw(gd, gd, 8); + } + // gd restored after that + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0); + STxw(gd, ed, fixedaddress); + } SMWRITELOCK(lock); } break; @@ -2376,14 +2389,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = TO_NAT((nextop & 7) + (rex.b << 3)); MOV64xw(ed, i64); } else { // mem <= i32 - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4); - i64 = F32S; - if(i64) { - MOV64xw(x3, i64); - ed = x3; - } else - ed = xZR; - STxw(ed, wback, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, &lock, 0, 4); + i64 = F32S; + if(i64) { + MOV64xw(x3, i64); + ed = x3; + } else + ed = xZR; + for(int i=0; i<(1<<(2+rex.w)); ++i) { + STURB_I9(ed, wback, i); + if(ed!=xZR) + RORxw(ed, ed, 8); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4); + i64 = F32S; + if(i64) { + MOV64xw(x3, i64); + ed = x3; + } else + ed = xZR; + STxw(ed, wback, fixedaddress); + } SMWRITELOCK(lock); } break; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index febb75a3..9bca36f7 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -260,8 +260,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); - VST128(v0, ed, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(wback!=x1) { + MOVx_REG(x1, wback); + wback = x1; + } + for(int i=0; i<16; ++i) { + VST1_8(v0, i, wback); + ADDx_U12(wback, wback, 1); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); + } SMWRITE2(); } break; diff --git a/src/dynarec/arm64/dynarec_arm64_arch.c b/src/dynarec/arm64/dynarec_arm64_arch.c index fae233ce..43cd68da 100644 --- a/src/dynarec/arm64/dynarec_arm64_arch.c +++ b/src/dynarec/arm64/dynarec_arm64_arch.c @@ -21,6 +21,7 @@ typedef struct arch_build_s uint8_t mmx:1; uint8_t sse:1; uint8_t ymm:1; + uint8_t unaligned; arch_flags_t flags_; arch_x87_t x87_; arch_mmx_t mmx_; @@ -48,7 +49,8 @@ static int arch_build(dynarec_arm_t* dyn, int ninst, arch_build_t* arch) arch->sse = 1; arch->sse_.sse |= 1<<i; } - return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm; + arch->unaligned = dyn->insts[ninst].unaligned; + return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm + arch->unaligned; } size_t get_size_arch(dynarec_arm_t* dyn) @@ -62,7 +64,7 @@ size_t get_size_arch(dynarec_arm_t* dyn) if(!dyn->size) return 0; for(int i=0; i<dyn->size; ++i) { last = arch_build(dyn, i, &build); - if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<11)-1)) && i) { + if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) { // same sequence, increment ++seq; } else { @@ -89,6 +91,7 @@ static void build_next(arch_arch_t* arch, arch_build_t* build) arch->mmx = build->mmx; arch->sse = build->sse; arch->ymm = build->ymm; + arch->unaligned = build->unaligned; arch->seq = 0; void* p = ((void*)arch)+sizeof(arch_arch_t); #define GO(A) \ @@ -126,7 +129,7 @@ void populate_arch(dynarec_arm_t* dyn, void* p) int seq = 0; for(int i=0; i<dyn->size; ++i) { arch_build(dyn, i, &build); - if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<11)-1)) && i) { + if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) { // same sequence, increment seq++; arch->seq = seq; @@ -230,4 +233,24 @@ void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc) } } dynarec_log(LOG_INFO, "\n"); +} + +int arch_unaligned(dynablock_t* db, uintptr_t x64pc) +{ + if(!db->arch_size || !db->arch) + return 0; + int ninst = getX64AddressInst(db, x64pc); + if(ninst<0) { + return 0; + } + // look for state at ninst + arch_arch_t* arch = db->arch; + arch_arch_t* next = arch; + int i = -1; + while(i<ninst) { + arch = next; + i += 1+arch->seq; + next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch)); + } + return arch->unaligned; } \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_arch.h b/src/dynarec/arm64/dynarec_arm64_arch.h index 68f65d8a..84392325 100644 --- a/src/dynarec/arm64/dynarec_arm64_arch.h +++ b/src/dynarec/arm64/dynarec_arm64_arch.h @@ -50,7 +50,8 @@ typedef struct arch_arch_s uint16_t mmx:1; uint16_t sse:1; uint16_t ymm:1; - uint16_t seq:11; // how many instruction on the same values + uint16_t unaligned:1; + uint16_t seq:10; // how many instruction on the same values } arch_arch_t; // get size of arch specific info (can be 0) @@ -59,4 +60,6 @@ size_t get_size_arch(dynarec_arm_t* dyn); void populate_arch(dynarec_arm_t* dyn, void* p); //adjust flags and more void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t native_addr); +// get if instruction can be regenerated for unaligned access +int arch_unaligned(dynablock_t* db, uintptr_t x64pc); #endif // __DYNAREC_ARM_ARCH_H__ diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 934f64a5..266005f2 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -977,6 +977,9 @@ } \ SET_DFNONE(s1); \ +#ifndef IF_UNALIGNED +#define IF_UNALIGNED(A) if(is_addr_unaligned(A)) +#endif #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index e9d7bb7e..0f096482 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -66,3 +66,5 @@ #define IFNATIVE_BEFORE(A) if(mark_natflag(dyn, ninst, A, 1)) #define INVERT_CARRY(A) dyn->insts[ninst].invert_carry = 1 #define INVERT_CARRY_BEFORE(A) dyn->insts[ninst].invert_carry_before = 1 +// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned +#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1))) \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 8f10b0f5..b88a9ad1 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -122,6 +122,7 @@ typedef struct instruction_arm64_s { unsigned normal_carry_before:1; unsigned invert_carry:1; // this opcode force an inverted carry unsigned df_notneeded:1; + unsigned unaligned:1; // this opcode can be re-generated for unaligned special case flagcache_t f_exit; // flags status at end of instruction neoncache_t n; // neoncache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 1b619e1a..85ec3ef4 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -29,6 +29,7 @@ #define ARCH_FILL(A, B) populate_arch(A, B) #define ARCH_ADJUST(A, B, C, D) adjust_arch(A, B, C, D) #define STOP_NATIVE_FLAGS(A, B) A->insts[B].nat_flags_op = NAT_FLAG_OP_UNUSABLE +#define ARCH_UNALIGNED(A, B) arch_unaligned(A, B) #elif defined(LA64) #define instruction_native_t instruction_la64_t @@ -55,6 +56,7 @@ #define ARCH_FILL(A, B) {} #define ARCH_ADJUST(A, B, C, D) {} #define STOP_NATIVE_FLAGS(A, B) {} +#define ARCH_UNALIGNED(A, B) 0 #elif defined(RV64) #define instruction_native_t instruction_rv64_t @@ -83,6 +85,7 @@ #define ARCH_FILL(A, B) {} #define ARCH_ADJUST(A, B, C, D) {} #define STOP_NATIVE_FLAGS(A, B) {} +#define ARCH_UNALIGNED(A, B) 0 #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_helper.h b/src/dynarec/dynarec_helper.h index 1f567be2..9997fce2 100644 --- a/src/dynarec/dynarec_helper.h +++ b/src/dynarec/dynarec_helper.h @@ -184,6 +184,8 @@ #define SMDMB() DMB_ISH() #endif +int is_addr_unaligned(uintptr_t addr); + #ifdef ARM64 #include "arm64/dynarec_arm64_helper.h" #elif defined(LA64) diff --git a/src/libtools/signals.c b/src/libtools/signals.c index f4ac7fc2..ca1bd291 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -32,6 +32,7 @@ #include "emu/x87emu_private.h" #include "custommem.h" #include "bridge.h" +#include "khash.h" #ifdef DYNAREC #include "dynablock.h" #include "../dynarec/dynablock_private.h" @@ -655,10 +656,43 @@ void copyUCTXreg2Emu(x64emu_t* emu, ucontext_t* p, uintptr_t ip) { #endif } -int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd) +KHASH_SET_INIT_INT64(unaligned) +static kh_unaligned_t *unaligned = NULL; + +void add_unaligned_address(uintptr_t addr) +{ + if(!unaligned) + unaligned = kh_init(unaligned); + khint_t k; + int ret; + k = kh_put(unaligned, unaligned, addr, &ret); // just add +} + +int is_addr_unaligned(uintptr_t addr) +{ + if(!unaligned) + return 0; + khint_t k = kh_get(unaligned, unaligned, addr); + return (k==kh_end(unaligned))?0:1; +} + +int mark_db_unaligned(dynablock_t* db, uintptr_t x64pc) +{ + add_unaligned_address(x64pc); + db->hash++; // dirty the block + MarkDynablock(db); // and mark it +if(box64_showsegv) printf_log(LOG_INFO, "Marked db %p as dirty, and address %p as needing unaligned handling\n", db, (void*)x64pc); + return 2; // marked, exit handling... +} + + +int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd, dynablock_t* db, uintptr_t x64pc) { if((uintptr_t)pc<0x10000) return 0; + + if(ARCH_UNALIGNED(db, x64pc)) + /*return*/ mark_db_unaligned(db, x64pc); // don't force an exit for now #ifdef ARM64 ucontext_t *p = (ucontext_t *)ucntx; uint32_t opcode = *(uint32_t*)pc; @@ -978,6 +1012,7 @@ int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd) #undef SIGN_EXT #endif return 0; +#undef CHECK } #ifdef BOX32 @@ -1482,19 +1517,32 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) void* fpsimd = NULL; #warning Unhandled architecture #endif - if((sig==SIGBUS) && (addr!=pc) && sigbus_specialcases(info, ucntx, pc, fpsimd)) { - // special case fixed, restore everything and just continues - if(box64_log>=LOG_DEBUG || box64_showsegv) { - static void* old_pc[2] = {0}; - static int old_pc_i = 0; - if(old_pc[0]!=pc && old_pc[1]!=pc) { - old_pc[old_pc_i++] = pc; - if(old_pc_i==2) - old_pc_i = 0; - printf_log(LOG_INFO, "Special unalinged cased fixed @%p, opcode=%08x (addr=%p)\n", pc, *(uint32_t*)pc, addr); + dynablock_t* db = NULL; + int db_searched = 0; + if((sig==SIGBUS) && (addr!=pc)) { + db = FindDynablockFromNativeAddress(pc); + db_searched = 1; + uint8_t* x64pc = NULL; + if(db) + x64pc = (uint8_t*)getX64Address(db, (uintptr_t)pc); + int fixed = 0; + if((fixed=sigbus_specialcases(info, ucntx, pc, fpsimd, db, (uintptr_t)x64pc))) { + // special case fixed, restore everything and just continues + if(box64_log>=LOG_DEBUG || box64_showsegv) { + static void* old_pc[2] = {0}; + static int old_pc_i = 0; + if(old_pc[0]!=pc && old_pc[1]!=pc) { + old_pc[old_pc_i++] = pc; + if(old_pc_i==2) + old_pc_i = 0; + if(db) + printf_log(LOG_INFO, "Special unalinged case fixed @%p, opcode=%08x (addr=%p, db=%p, x64pc=%p[%02hhX %02hhX %02hhX %02hhX %02hhX])\n", pc, *(uint32_t*)pc, addr, db, x64pc, x64pc[0], x64pc[1], x64pc[2], x64pc[3], x64pc[4], x64pc[5]); + else + printf_log(LOG_INFO, "Special unalinged case fixed @%p, opcode=%08x (addr=%p)\n", pc, *(uint32_t*)pc, addr); + } } + return; } - return; } int Locks = unlockMutex(); uint32_t prot = getProtection((uintptr_t)addr); @@ -1507,20 +1555,29 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) } #endif #ifdef RV64 - if((sig==SIGSEGV) && (addr==pc) && (info->si_code==2) && (prot==(PROT_READ|PROT_WRITE|PROT_EXEC)) && sigbus_specialcases(info, ucntx, pc, fpsimd)) { - // special case fixed, restore everything and just continues - if(box64_log >= LOG_DEBUG || box64_showsegv) { - static void* old_pc[2] = {0}; - static int old_pc_i = 0; - if(old_pc[0]!=pc && old_pc[1]!=pc) { - old_pc[old_pc_i++] = pc; - if(old_pc_i==2) - old_pc_i = 0; - printf_log(LOG_NONE, "Special unalinged cased fixed @%p, opcode=%08x (addr=%p)\n", pc, *(uint32_t *)pc, addr); + if((sig==SIGSEGV) && (addr==pc) && (info->si_code==2) && (prot==(PROT_READ|PROT_WRITE|PROT_EXEC))) { + if(!db_searched) + db = FindDynablockFromNativeAddress(pc); + db_searched = 1; + uint8_t* x64pc = NULL; + if(db) + x64pc = (uint8_t*)getX64Address(db, (uintptr_t)pc); + int fixed = 0; + if((fixed = sigbus_specialcases(info, ucntx, pc, fpsimd, db, (uintptr_t)x64pc))) { + // special case fixed, restore everything and just continues + if(box64_log >= LOG_DEBUG || box64_showsegv) { + static void* old_pc[2] = {0}; + static int old_pc_i = 0; + if(old_pc[0]!=pc && old_pc[1]!=pc) { + old_pc[old_pc_i++] = pc; + if(old_pc_i==2) + old_pc_i = 0; + printf_log(LOG_NONE, "Special unalinged cased fixed @%p, opcode=%08x (addr=%p)\n", pc, *(uint32_t *)pc, addr); + } } + relockMutex(Locks); + return; } - relockMutex(Locks); - return; } #endif #ifdef DYNAREC @@ -1531,12 +1588,11 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) cancelFillBlock(); // Segfault inside a Fillblock, cancel it's creation... // cancelFillBlock does not return } - dynablock_t* db = NULL; - int db_searched = 0; if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_DYNAREC)) { lock_signal(); // check if SMC inside block - db = FindDynablockFromNativeAddress(pc); + if(!db_searched) + db = FindDynablockFromNativeAddress(pc); db_searched = 1; // access error, unprotect the block (and mark them dirty) unprotectDB((uintptr_t)addr, 1, 1); // unprotect 1 byte... But then, the whole page will be unprotected |