about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-01-11 12:38:23 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-01-11 12:38:35 +0100
commit35a68282097ec90c3dcd473402e234aa8a6ba4a9 (patch)
treeb3b3bcbd20ddaac7af0f9b5b730402cb4a20537c /src/dynarec
parent2b66675a08f7f56eb5840330247484f56cdf685a (diff)
downloadbox64-35a68282097ec90c3dcd473402e234aa8a6ba4a9.tar.gz
box64-35a68282097ec90c3dcd473402e234aa8a6ba4a9.zip
[ARM64_DYNAREC] Better handling unaligned access to device memory, with regeration of code
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c48
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c16
-rw-r--r--src/dynarec/arm64/dynarec_arm64_arch.c29
-rw-r--r--src/dynarec/arm64/dynarec_arm64_arch.h5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h3
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass0.h2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h1
-rw-r--r--src/dynarec/dynarec_arch.h3
-rw-r--r--src/dynarec/dynarec_helper.h2
10 files changed, 95 insertions, 16 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index be7bf0c7..8d604f20 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -927,10 +927,12 @@ int convert_bitmask(uint64_t bitmask);
 #define VLD64(A, B, C)      if(unscaled) {VLDR64_I9(A, B, C);} else {VLDR64_U12(A, B, C);}
 #define VLD32(A, B, C)      if(unscaled) {VLDR32_I9(A, B, C);} else {VLDR32_U12(A, B, C);}
 #define VLD16(A, B, C)      if(unscaled) {VLDR16_I9(A, B, C);} else {VLDR16_U12(A, B, C);}
+#define VLD8(A, B, C)       if(unscaled) {VLDR8_I9(A, B, C);} else {VLDR8_U12(A, B, C);}
 #define VST128(A, B, C)     if(unscaled) {VSTR128_I9(A, B, C);} else {VSTR128_U12(A, B, C);}
 #define VST64(A, B, C)      if(unscaled) {VSTR64_I9(A, B, C);} else {VSTR64_U12(A, B, C);}
 #define VST32(A, B, C)      if(unscaled) {VSTR32_I9(A, B, C);} else {VSTR32_U12(A, B, C);}
 #define VST16(A, B, C)      if(unscaled) {VSTR16_I9(A, B, C);} else {VSTR16_U12(A, B, C);}
+#define VST8(A, B, C)       if(unscaled) {VSTR8_I9(A, B, C);} else {VSTR8_U12(A, B, C);}
 
 #define VMEMW_gen(size, opc, imm9, op2, Rn, Rt)  ((size)<<30 | 0b111<<27 | 1<<26 | (opc)<<22 | (imm9)<<12 | (op2)<<10 | 0b01<<10 | (Rn)<<5 | (Rt))
 #define VLDR64_S9_postindex(Rt, Rn, imm9)   EMIT(VMEMW_gen(0b11, 0b01, (imm9)&0x1ff, 0b01, Rn, Rt))
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index c7766298..e387b8bc 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -1412,8 +1412,21 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(MODREG) {   // reg <= reg
                 MOVxw_REG(TO_NAT((nextop & 7) + (rex.b << 3)), gd);
             } else {                    // mem <= reg
-                addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0);
-                STxw(gd, ed, fixedaddress);
+                IF_UNALIGNED(ip) {
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, &lock, 0, 0);
+                    if(gd==wback) {
+                        MOVx_REG(x2, wback);
+                        wback = x2;
+                    }
+                    for(int i=0; i<(1<<(2+rex.w)); ++i) {
+                        STURB_I9(gd, wback, i);
+                        RORxw(gd, gd, 8);
+                    }
+                    // gd restored after that
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff << (2 + rex.w), (1 << (2 + rex.w)) - 1, rex, &lock, 0, 0);
+                    STxw(gd, ed, fixedaddress);
+                }
                 SMWRITELOCK(lock);
             }
             break;
@@ -2376,14 +2389,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 ed = TO_NAT((nextop & 7) + (rex.b << 3));
                 MOV64xw(ed, i64);
             } else {                    // mem <= i32
-                addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4);
-                i64 = F32S;
-                if(i64) {
-                    MOV64xw(x3, i64);
-                    ed = x3;
-                } else
-                    ed = xZR;
-                STxw(ed, wback, fixedaddress);
+                IF_UNALIGNED(ip) {
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, NULL, 0, 0, rex, &lock, 0, 4);
+                    i64 = F32S;
+                    if(i64) {
+                        MOV64xw(x3, i64);
+                        ed = x3;
+                    } else
+                        ed = xZR;
+                    for(int i=0; i<(1<<(2+rex.w)); ++i) {
+                        STURB_I9(ed, wback, i);
+                        if(ed!=xZR)
+                            RORxw(ed, ed, 8);
+                    }
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, &lock, 0, 4);
+                    i64 = F32S;
+                    if(i64) {
+                        MOV64xw(x3, i64);
+                        ed = x3;
+                    } else
+                        ed = xZR;
+                    STxw(ed, wback, fixedaddress);
+                }
                 SMWRITELOCK(lock);
             }
             break;
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index febb75a3..9bca36f7 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -260,8 +260,20 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 v1 = sse_get_reg_empty(dyn, ninst, x1, ed);

                 VMOVQ(v1, v0);

             } else {

-                addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0);

-                VST128(v0, ed, fixedaddress);

+                IF_UNALIGNED(ip) {

+                    addr = geted(dyn, addr, ninst, nextop, &wback, x1, &fixedaddress, NULL, 0, 0, rex, NULL, 0, 0);

+                    if(wback!=x1) {

+                        MOVx_REG(x1, wback);

+                        wback = x1;

+                    }

+                    for(int i=0; i<16; ++i) {

+                        VST1_8(v0, i, wback);

+                        ADDx_U12(wback, wback, 1);

+                    }

+                } else {

+                    addr = geted(dyn, addr, ninst, nextop, &ed, x1, &fixedaddress, &unscaled, 0xfff<<4, 15, rex, NULL, 0, 0);

+                    VST128(v0, ed, fixedaddress);

+                }

                 SMWRITE2();

             }

             break;

diff --git a/src/dynarec/arm64/dynarec_arm64_arch.c b/src/dynarec/arm64/dynarec_arm64_arch.c
index fae233ce..43cd68da 100644
--- a/src/dynarec/arm64/dynarec_arm64_arch.c
+++ b/src/dynarec/arm64/dynarec_arm64_arch.c
@@ -21,6 +21,7 @@ typedef struct arch_build_s
     uint8_t mmx:1;
     uint8_t sse:1;
     uint8_t ymm:1;
+    uint8_t unaligned;
     arch_flags_t flags_;
     arch_x87_t x87_;
     arch_mmx_t mmx_;
@@ -48,7 +49,8 @@ static int arch_build(dynarec_arm_t* dyn, int ninst, arch_build_t* arch)
             arch->sse = 1;
             arch->sse_.sse |= 1<<i;
         }
-    return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm;
+    arch->unaligned = dyn->insts[ninst].unaligned;
+    return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm + arch->unaligned;
 }
 
 size_t get_size_arch(dynarec_arm_t* dyn)
@@ -62,7 +64,7 @@ size_t get_size_arch(dynarec_arm_t* dyn)
     if(!dyn->size) return 0;
     for(int i=0; i<dyn->size; ++i) {
         last = arch_build(dyn, i, &build);
-        if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<11)-1)) && i) {
+        if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) {
             // same sequence, increment
             ++seq;
         } else {
@@ -89,6 +91,7 @@ static void build_next(arch_arch_t* arch, arch_build_t* build)
     arch->mmx = build->mmx;
     arch->sse = build->sse;
     arch->ymm = build->ymm;
+    arch->unaligned = build->unaligned;
     arch->seq = 0;
     void* p = ((void*)arch)+sizeof(arch_arch_t);
     #define GO(A)                                           \
@@ -126,7 +129,7 @@ void populate_arch(dynarec_arm_t* dyn, void* p)
     int seq = 0;
     for(int i=0; i<dyn->size; ++i) {
         arch_build(dyn, i, &build);
-        if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<11)-1)) && i) {
+        if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) {
             // same sequence, increment
             seq++;
             arch->seq = seq;
@@ -230,4 +233,24 @@ void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc)
             }
     }
     dynarec_log(LOG_INFO, "\n");
+}
+
+int arch_unaligned(dynablock_t* db, uintptr_t x64pc)
+{
+    if(!db->arch_size || !db->arch)
+        return 0;
+    int ninst = getX64AddressInst(db, x64pc);
+    if(ninst<0) {
+        return 0;
+    }
+    // look for state at ninst
+    arch_arch_t* arch = db->arch;
+    arch_arch_t* next = arch;
+    int i = -1;
+    while(i<ninst) {
+        arch = next;
+        i += 1+arch->seq;
+        next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch));
+    }
+    return arch->unaligned;
 }
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_arch.h b/src/dynarec/arm64/dynarec_arm64_arch.h
index 68f65d8a..84392325 100644
--- a/src/dynarec/arm64/dynarec_arm64_arch.h
+++ b/src/dynarec/arm64/dynarec_arm64_arch.h
@@ -50,7 +50,8 @@ typedef struct arch_arch_s
     uint16_t mmx:1;
     uint16_t sse:1;
     uint16_t ymm:1;
-    uint16_t seq:11;    // how many instruction on the same values
+    uint16_t unaligned:1;
+    uint16_t seq:10;    // how many instruction on the same values
 } arch_arch_t;
 
 // get size of arch specific info (can be 0)
@@ -59,4 +60,6 @@ size_t get_size_arch(dynarec_arm_t* dyn);
 void populate_arch(dynarec_arm_t* dyn, void* p);
 //adjust flags and more
 void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t native_addr);
+// get if instruction can be regenerated for unaligned access
+int arch_unaligned(dynablock_t* db, uintptr_t x64pc);
 #endif // __DYNAREC_ARM_ARCH_H__
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 934f64a5..266005f2 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -977,6 +977,9 @@
     }                                                                       \
     SET_DFNONE(s1);                                                         \
 
+#ifndef IF_UNALIGNED
+#define IF_UNALIGNED(A)    if(is_addr_unaligned(A))
+#endif
 
 #define STORE_REG(A)    STRx_U12(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define STP_REGS(A, B)  STPx_S7_offset(x##A, x##B, xEmu, offsetof(x64emu_t, regs[_##A]))
diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h
index e9d7bb7e..0f096482 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass0.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass0.h
@@ -66,3 +66,5 @@
 #define IFNATIVE_BEFORE(A)     if(mark_natflag(dyn, ninst, A, 1))
 #define INVERT_CARRY(A) dyn->insts[ninst].invert_carry = 1
 #define INVERT_CARRY_BEFORE(A) dyn->insts[ninst].invert_carry_before = 1
+// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned
+#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1)))
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 8f10b0f5..b88a9ad1 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -122,6 +122,7 @@ typedef struct instruction_arm64_s {
     unsigned            normal_carry_before:1;
     unsigned            invert_carry:1; // this opcode force an inverted carry
     unsigned            df_notneeded:1;
+    unsigned            unaligned:1;    // this opcode can be re-generated for unaligned special case
     flagcache_t         f_exit;     // flags status at end of instruction
     neoncache_t         n;          // neoncache at end of instruction (but before poping)
     flagcache_t         f_entry;    // flags status before the instruction begin
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index 1b619e1a..85ec3ef4 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -29,6 +29,7 @@
 #define ARCH_FILL(A, B) populate_arch(A, B)

 #define ARCH_ADJUST(A, B, C, D) adjust_arch(A, B, C, D)

 #define STOP_NATIVE_FLAGS(A, B)   A->insts[B].nat_flags_op = NAT_FLAG_OP_UNUSABLE

+#define ARCH_UNALIGNED(A, B) arch_unaligned(A, B)

 #elif defined(LA64)

 

 #define instruction_native_t        instruction_la64_t

@@ -55,6 +56,7 @@
 #define ARCH_FILL(A, B)  {}

 #define ARCH_ADJUST(A, B, C, D) {}

 #define STOP_NATIVE_FLAGS(A, B) {}

+#define ARCH_UNALIGNED(A, B) 0

 #elif defined(RV64)

 

 #define instruction_native_t        instruction_rv64_t

@@ -83,6 +85,7 @@
 #define ARCH_FILL(A, B)  {}

 #define ARCH_ADJUST(A, B, C, D) {}

 #define STOP_NATIVE_FLAGS(A, B) {}

+#define ARCH_UNALIGNED(A, B) 0

 #else

 #error Unsupported platform

 #endif

diff --git a/src/dynarec/dynarec_helper.h b/src/dynarec/dynarec_helper.h
index 1f567be2..9997fce2 100644
--- a/src/dynarec/dynarec_helper.h
+++ b/src/dynarec/dynarec_helper.h
@@ -184,6 +184,8 @@
 #define SMDMB() DMB_ISH()

 #endif

 

+int is_addr_unaligned(uintptr_t addr);

+

 #ifdef ARM64

 #include "arm64/dynarec_arm64_helper.h"

 #elif defined(LA64)