diff options
| author | ptitSeb <sebastien.chev@gmail.com> | 2025-01-11 12:38:23 +0100 |
|---|---|---|
| committer | ptitSeb <sebastien.chev@gmail.com> | 2025-01-11 12:38:35 +0100 |
| commit | 35a68282097ec90c3dcd473402e234aa8a6ba4a9 (patch) | |
| tree | b3b3bcbd20ddaac7af0f9b5b730402cb4a20537c /src/dynarec | |
| parent | 2b66675a08f7f56eb5840330247484f56cdf685a (diff) | |
| download | box64-35a68282097ec90c3dcd473402e234aa8a6ba4a9.tar.gz box64-35a68282097ec90c3dcd473402e234aa8a6ba4a9.zip | |
[ARM64_DYNAREC] Better handling unaligned access to device memory, with regeration of code
Diffstat (limited to 'src/dynarec')
| -rw-r--r-- | src/dynarec/arm64/arm64_emitter.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_00.c | 48 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_0f.c | 16 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_arch.c | 29 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_arch.h | 5 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_helper.h | 3 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_pass0.h | 2 | ||||
| -rw-r--r-- | src/dynarec/arm64/dynarec_arm64_private.h | 1 | ||||
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 3 | ||||
| -rw-r--r-- | src/dynarec/dynarec_helper.h | 2 |
10 files changed, 95 insertions, 16 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h index be7bf0c7..8d604f20 100644 --- a/src/dynarec/arm64/arm64_emitter.h +++ b/src/dynarec/arm64/arm64_emitter.h @@ -927,10 +927,12 @@ int convert_bitmask(uint64_t bitmask); #define VLD64(A, B, C) if(unscaled) {VLDR64_I9(A, B, C);} else {VLDR64_U12(A, B, C);} #define VLD32(A, B, C) if(unscaled) {VLDR32_I9(A, B, C);} else {VLDR32_U12(A, B, C);} #define VLD16(A, B, C) if(unscaled) {VLDR16_I9(A, B, C);} else {VLDR16_U12(A, B, C);} +#define VLD8(A, B, C) if(unscaled) {VLDR8_I9(A, B, C);} else {VLDR8_U12(A, B, C);} #define VST128(A, B, C) if(unscaled) {VSTR128_I9(A, B, C);} else {VSTR128_U12(A, B, C);} #define VST64(A, B, C) if(unscaled) {VSTR64_I9(A, B, C);} else {VSTR64_U12(A, B, C);} #define VST32(A, B, C) if(unscaled) {VSTR32_I9(A, B, C);} else {VSTR32_U12(A, B, C);} #define VST16(A, B, C) if(unscaled) {VSTR16_I9(A, B, C);} else {VSTR16_U12(A, B, C);} +#define VST8(A, B, C) if(unscaled) {VSTR8_I9(A, B, C);} else {VSTR8_U12(A, B, C);} #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt) ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt)) #define VLDR64_S9_postindex(Rt, Rn, imm9) EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt)) diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c index c7766298..e387b8bc 100644 --- a/src/dynarec/arm64/dynarec_arm64_00.c +++ b/src/dynarec/arm64/dynarec_arm64_00.c @@ -1412,8 +1412,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin if(MODREG) { // reg <= reg MOVxw_REG(TO_NAT((nextop & 7) + (rex.b << 3)), gd); } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0); - STxw(gd, ed, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, &lock, 0, 0); + if(gd==wback) { + MOVx_REG(x2, wback); + wback = x2; + } + for(int i=0; i<(1<<(2+rex.w)); ++i) { + STURB_I9(gd, wback, i); + RORxw(gd, gd, 8); + } + // gd restored after that + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0); + STxw(gd, ed, fixedaddress); + } SMWRITELOCK(lock); } break; @@ -2376,14 +2389,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin ed = TO_NAT((nextop & 7) + (rex.b << 3)); MOV64xw(ed, i64); } else { // mem <= i32 - addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4); - i64 = F32S; - if(i64) { - MOV64xw(x3, i64); - ed = x3; - } else - ed = xZR; - STxw(ed, wback, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, &lock, 0, 4); + i64 = F32S; + if(i64) { + MOV64xw(x3, i64); + ed = x3; + } else + ed = xZR; + for(int i=0; i<(1<<(2+rex.w)); ++i) { + STURB_I9(ed, wback, i); + if(ed!=xZR) + RORxw(ed, ed, 8); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4); + i64 = F32S; + if(i64) { + MOV64xw(x3, i64); + ed = x3; + } else + ed = xZR; + STxw(ed, wback, fixedaddress); + } SMWRITELOCK(lock); } break; diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c index febb75a3..9bca36f7 100644 --- a/src/dynarec/arm64/dynarec_arm64_0f.c +++ b/src/dynarec/arm64/dynarec_arm64_0f.c @@ -260,8 +260,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin v1 = sse_get_reg_empty(dyn, ninst, x1, ed); VMOVQ(v1, v0); } else { - addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); - VST128(v0, ed, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0); + if(wback!=x1) { + MOVx_REG(x1, wback); + wback = x1; + } + for(int i=0; i<16; ++i) { + VST1_8(v0, i, wback); + ADDx_U12(wback, wback, 1); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0); + VST128(v0, ed, fixedaddress); + } SMWRITE2(); } break; diff --git a/src/dynarec/arm64/dynarec_arm64_arch.c b/src/dynarec/arm64/dynarec_arm64_arch.c index fae233ce..43cd68da 100644 --- a/src/dynarec/arm64/dynarec_arm64_arch.c +++ b/src/dynarec/arm64/dynarec_arm64_arch.c @@ -21,6 +21,7 @@ typedef struct arch_build_s uint8_t mmx:1; uint8_t sse:1; uint8_t ymm:1; + uint8_t unaligned; arch_flags_t flags_; arch_x87_t x87_; arch_mmx_t mmx_; @@ -48,7 +49,8 @@ static int arch_build(dynarec_arm_t* dyn, int ninst, arch_build_t* arch) arch->sse = 1; arch->sse_.sse |= 1<<i; } - return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm; + arch->unaligned = dyn->insts[ninst].unaligned; + return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm + arch->unaligned; } size_t get_size_arch(dynarec_arm_t* dyn) @@ -62,7 +64,7 @@ size_t get_size_arch(dynarec_arm_t* dyn) if(!dyn->size) return 0; for(int i=0; i<dyn->size; ++i) { last = arch_build(dyn, i, &build); - if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<11)-1)) && i) { + if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) { // same sequence, increment ++seq; } else { @@ -89,6 +91,7 @@ static void build_next(arch_arch_t* arch, arch_build_t* build) arch->mmx = build->mmx; arch->sse = build->sse; arch->ymm = build->ymm; + arch->unaligned = build->unaligned; arch->seq = 0; void* p = ((void*)arch)+sizeof(arch_arch_t); #define GO(A) \ @@ -126,7 +129,7 @@ void populate_arch(dynarec_arm_t* dyn, void* p) int seq = 0; for(int i=0; i<dyn->size; ++i) { arch_build(dyn, i, &build); - if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<11)-1)) && i) { + if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) { // same sequence, increment seq++; arch->seq = seq; @@ -230,4 +233,24 @@ void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc) } } dynarec_log(LOG_INFO, "\n"); +} + +int arch_unaligned(dynablock_t* db, uintptr_t x64pc) +{ + if(!db->arch_size || !db->arch) + return 0; + int ninst = getX64AddressInst(db, x64pc); + if(ninst<0) { + return 0; + } + // look for state at ninst + arch_arch_t* arch = db->arch; + arch_arch_t* next = arch; + int i = -1; + while(i<ninst) { + arch = next; + i += 1+arch->seq; + next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch)); + } + return arch->unaligned; } \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_arch.h b/src/dynarec/arm64/dynarec_arm64_arch.h index 68f65d8a..84392325 100644 --- a/src/dynarec/arm64/dynarec_arm64_arch.h +++ b/src/dynarec/arm64/dynarec_arm64_arch.h @@ -50,7 +50,8 @@ typedef struct arch_arch_s uint16_t mmx:1; uint16_t sse:1; uint16_t ymm:1; - uint16_t seq:11; // how many instruction on the same values + uint16_t unaligned:1; + uint16_t seq:10; // how many instruction on the same values } arch_arch_t; // get size of arch specific info (can be 0) @@ -59,4 +60,6 @@ size_t get_size_arch(dynarec_arm_t* dyn); void populate_arch(dynarec_arm_t* dyn, void* p); //adjust flags and more void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t native_addr); +// get if instruction can be regenerated for unaligned access +int arch_unaligned(dynablock_t* db, uintptr_t x64pc); #endif // __DYNAREC_ARM_ARCH_H__ diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index 934f64a5..266005f2 100644 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -977,6 +977,9 @@ } \ SET_DFNONE(s1); \ +#ifndef IF_UNALIGNED +#define IF_UNALIGNED(A) if(is_addr_unaligned(A)) +#endif #define STORE_REG(A) STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define STP_REGS(A, B) STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h index e9d7bb7e..0f096482 100644 --- a/src/dynarec/arm64/dynarec_arm64_pass0.h +++ b/src/dynarec/arm64/dynarec_arm64_pass0.h @@ -66,3 +66,5 @@ #define IFNATIVE_BEFORE(A) if(mark_natflag(dyn, ninst, A, 1)) #define INVERT_CARRY(A) dyn->insts[ninst].invert_carry = 1 #define INVERT_CARRY_BEFORE(A) dyn->insts[ninst].invert_carry_before = 1 +// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned +#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1))) \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h index 8f10b0f5..b88a9ad1 100644 --- a/src/dynarec/arm64/dynarec_arm64_private.h +++ b/src/dynarec/arm64/dynarec_arm64_private.h @@ -122,6 +122,7 @@ typedef struct instruction_arm64_s { unsigned normal_carry_before:1; unsigned invert_carry:1; // this opcode force an inverted carry unsigned df_notneeded:1; + unsigned unaligned:1; // this opcode can be re-generated for unaligned special case flagcache_t f_exit; // flags status at end of instruction neoncache_t n; // neoncache at end of instruction (but before poping) flagcache_t f_entry; // flags status before the instruction begin diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 1b619e1a..85ec3ef4 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -29,6 +29,7 @@ #define ARCH_FILL(A, B) populate_arch(A, B) #define ARCH_ADJUST(A, B, C, D) adjust_arch(A, B, C, D) #define STOP_NATIVE_FLAGS(A, B) A->insts[B].nat_flags_op = NAT_FLAG_OP_UNUSABLE +#define ARCH_UNALIGNED(A, B) arch_unaligned(A, B) #elif defined(LA64) #define instruction_native_t instruction_la64_t @@ -55,6 +56,7 @@ #define ARCH_FILL(A, B) {} #define ARCH_ADJUST(A, B, C, D) {} #define STOP_NATIVE_FLAGS(A, B) {} +#define ARCH_UNALIGNED(A, B) 0 #elif defined(RV64) #define instruction_native_t instruction_rv64_t @@ -83,6 +85,7 @@ #define ARCH_FILL(A, B) {} #define ARCH_ADJUST(A, B, C, D) {} #define STOP_NATIVE_FLAGS(A, B) {} +#define ARCH_UNALIGNED(A, B) 0 #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_helper.h b/src/dynarec/dynarec_helper.h index 1f567be2..9997fce2 100644 --- a/src/dynarec/dynarec_helper.h +++ b/src/dynarec/dynarec_helper.h @@ -184,6 +184,8 @@ #define SMDMB() DMB_ISH() #endif +int is_addr_unaligned(uintptr_t addr); + #ifdef ARM64 #include "arm64/dynarec_arm64_helper.h" #elif defined(LA64) |