about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorxctan <xctan@cirno.icu>2025-01-24 15:10:45 +0800
committerGitHub <noreply@github.com>2025-01-24 08:10:45 +0100
commit8c1ffca530fb471984cd91347d93457575c7ee5d (patch)
tree66508b2cf993b209f5f54a2e146ba66257528e42 /src
parent7099774a3c82642ad64bc3a4f1b798e951904f7d (diff)
downloadbox64-8c1ffca530fb471984cd91347d93457575c7ee5d.tar.gz
box64-8c1ffca530fb471984cd91347d93457575c7ee5d.zip
[RV64_DYNAREC] Added codegen for unaligned stores (#2289)
* [RV64_DYNAREC] Added another special SIGBUS case

* [RV64_DYNAREC] Added codegen for unaligned stores
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/dynarec_arch.h7
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c16
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c34
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c32
-rw-r--r--src/dynarec/rv64/dynarec_rv64_arch.c231
-rw-r--r--src/dynarec/rv64/dynarec_rv64_arch.h20
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f30f.c26
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h3
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h1
-rw-r--r--src/libtools/signals.c4
11 files changed, 356 insertions, 22 deletions
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index 85ec3ef4..a1af30c4 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -74,6 +74,7 @@
 #include "rv64/rv64_printer.h"

 #include "rv64/dynarec_rv64_private.h"

 #include "rv64/dynarec_rv64_functions.h"

+#include "rv64/dynarec_rv64_arch.h"

 // Limit here is unconditionnal jump, that is signed 21bits

 #define MAXBLOCK_SIZE ((1<<20)-200)

 

@@ -81,11 +82,11 @@
 #define UPDATE_SPECIFICS(A)

 #define PREUPDATE_SPECIFICS(A) updateNativeFlags(A)

 

-#define ARCH_SIZE(A)    0

-#define ARCH_FILL(A, B)  {}

+#define ARCH_SIZE(A)    get_size_arch(A)

+#define ARCH_FILL(A, B) populate_arch(A, B)

 #define ARCH_ADJUST(A, B, C, D) {}

 #define STOP_NATIVE_FLAGS(A, B) {}

-#define ARCH_UNALIGNED(A, B) 0

+#define ARCH_UNALIGNED(A, B) arch_unaligned(A, B)

 #else

 #error Unsupported platform

 #endif

diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 4f994174..29898876 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -404,8 +404,20 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             if (MODREG) { // reg <= reg
                 MVxw(TO_NAT((nextop & 7) + (rex.b << 3)), gd);
             } else { // mem <= reg
-                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
-                SDxw(gd, ed, fixedaddress);
+                IF_UNALIGNED(ip) {
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, (1 << (2 + rex.w)) - 1, 0);
+                    for (int i = 0; i < (1 << (2 + rex.w)); i++) {
+                        if (i == 0) {
+                            SB(gd, ed, fixedaddress);
+                        } else {
+                            SRLI(x3, gd, i * 8);
+                            SB(x3, ed, fixedaddress + i);
+                        }
+                    }
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
+                    SDxw(gd, ed, fixedaddress);
+                }
                 SMWRITELOCK(lock);
             }
             break;
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index 072ed33e..1a5d4fd7 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -392,14 +392,32 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ed = TO_NAT((nextop & 7) + (rex.b << 3));
                 MOV64xw(ed, i64);
             } else { // mem <= i32
-                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4);
-                i64 = F32S;
-                if (i64) {
-                    MOV64x(x3, i64);
-                    ed = x3;
-                } else
-                    ed = xZR;
-                SDxw(ed, wback, fixedaddress);
+                IF_UNALIGNED(ip) {
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, (1 << (2 + rex.w)) - 1, 4);
+                    i64 = F32S;
+                    if (i64) {
+                        MOV64x(x4, i64);
+                        ed = x4;
+                    } else
+                        ed = xZR;
+                    for (int i = 0; i < (1 << (2 + rex.w)); i++) {
+                        if (i == 0 || ed == xZR) {
+                            SB(ed, wback, fixedaddress + i);
+                        } else {
+                            SRLI(x3, ed, i * 8);
+                            SB(x3, wback, fixedaddress + i);
+                        }
+                    }
+                } else {
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4);
+                    i64 = F32S;
+                    if (i64) {
+                        MOV64x(x3, i64);
+                        ed = x3;
+                    } else
+                        ed = xZR;
+                    SDxw(ed, wback, fixedaddress);
+                }
                 SMWRITELOCK(lock);
             }
             break;
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index fd22aad4..d594a718 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -180,11 +180,33 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("MOVUPS Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0, 8);
-            LD(x3, gback, gdoffset + 0);
-            LD(x4, gback, gdoffset + 8);
-            SD(x3, wback, fixedaddress + 0);
-            SD(x4, wback, fixedaddress + 8);
+            IF_UNALIGNED(ip) {
+                GETEX(x2, 0, 15);
+                LD(x3, gback, gdoffset + 0);
+                LD(x4, gback, gdoffset + 8);
+                for (int i = 0; i < 8; i++) {
+                    if (i == 0) {
+                        SB(x3, wback, fixedaddress);
+                    } else {
+                        SRLI(x5, x3, i * 8);
+                        SB(x5, wback, fixedaddress + i);
+                    }
+                }
+                for (int i = 0; i < 8; i++) {
+                    if (i == 0) {
+                        SB(x4, wback, fixedaddress + 8);
+                    } else {
+                        SRLI(x5, x4, i * 8);
+                        SB(x5, wback, fixedaddress + i + 8);
+                    }
+                }
+            } else {
+                GETEX(x2, 0, 8);
+                LD(x3, gback, gdoffset + 0);
+                LD(x4, gback, gdoffset + 8);
+                SD(x3, wback, fixedaddress + 0);
+                SD(x4, wback, fixedaddress + 8);
+            }
             if (!MODREG)
                 SMWRITE2();
             break;
diff --git a/src/dynarec/rv64/dynarec_rv64_arch.c b/src/dynarec/rv64/dynarec_rv64_arch.c
new file mode 100644
index 00000000..25555c1f
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_arch.c
@@ -0,0 +1,231 @@
+#include <stddef.h>
+#include <stdio.h>
+#include <signal.h>
+#include <ucontext.h>
+#include <string.h>
+
+#include "debug.h"
+#include "dynablock.h"
+#include "x64emu.h"
+#include "emu/x64emu_private.h"
+#include "x64run.h"
+#include "emu/x64run_private.h"
+#include "dynarec/dynablock_private.h"
+#include "dynarec_rv64_arch.h"
+#include "dynarec_rv64_functions.h"
+#include "dynarec_rv64_private.h"
+
+//order might be important, so define SUPER for the right one
+#define SUPER() \
+    GO(flags)   \
+    GO(x87)     \
+    GO(mmx)     \
+    GO(sse)     \
+    GO(ymm)     \
+
+
+typedef struct arch_flags_s
+{
+    uint8_t ignore:1;
+} arch_flags_t;
+
+#define X87_ST_D 0
+#define X87_ST_F 1
+#define X87_ST_I64 2
+#define XMM0 0
+#define X870 XMM0 + 16
+#define EMM0 XMM0 + 16
+typedef struct arch_x87_s
+{
+    int8_t delta;        //up to +/-7
+    uint8_t x87;        // 1bit is STx present
+    uint16_t x87_type;  // 2bits per STx type
+    uint32_t x87_pos;   //4bits per STx position (well, 3 would be enough)
+} arch_x87_t;
+
+typedef struct arch_mmx_s
+{
+    uint8_t mmx;    //1bit for each mmx reg present
+} arch_mmx_t;
+
+typedef struct arch_sse_s
+{
+    uint16_t sse;   //1bit for each sse reg present
+} arch_sse_t;
+
+typedef struct arch_ymm_s
+{
+    uint16_t ymm0;      // 1bit for ymm0
+    uint16_t ymm;       // 1bit for each ymm present
+    uint64_t ymm_pos;   // 4bits for position of each ymm present
+} arch_ymm_t;
+
+typedef struct arch_arch_s
+{
+    #define GO(A) uint16_t A:1;
+    SUPER()
+    #undef GO
+    uint16_t unaligned:1;
+    uint16_t seq:10;    // how many instruction on the same values
+} arch_arch_t;
+
+typedef struct arch_build_s
+{
+    #define GO(A) uint8_t A:1;
+    SUPER()
+    #undef GO
+    uint8_t unaligned;
+    #define GO(A) arch_##A##_t A##_;
+    SUPER()
+    #undef GO
+} arch_build_t;
+
+static int arch_build(dynarec_rv64_t* dyn, int ninst, arch_build_t* arch)
+{
+    memset(arch, 0, sizeof(arch_build_t));
+    // todo
+    // opcode can handle unaligned 
+    arch->unaligned = dyn->insts[ninst].unaligned;
+    return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm + arch->unaligned;
+}
+
+size_t get_size_arch(dynarec_rv64_t* dyn)
+{
+    arch_build_t build = {0};
+    arch_build_t previous = {0};
+    size_t sz = 0;
+    int seq = 0;
+    int nseq = 0;
+    int last = 0;
+    if(!dyn->size) return 0;
+    for(int i=0; i<dyn->size; ++i) {
+        last = arch_build(dyn, i, &build);
+        if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) {
+            // same sequence, increment
+            ++seq;
+        } else {
+            seq = 0;
+            ++nseq;
+            memcpy(&previous, &build, sizeof(arch_build_t));
+            sz+=sizeof(arch_arch_t);
+            #define GO(A) if(build.A) sz+=sizeof(arch_##A##_t);
+            SUPER()
+            #undef GO
+        }
+    }
+    if(nseq==1 && !last)
+        return 0;   //empty, no flags, no nothing
+    return sz;
+}
+
+static void build_next(arch_arch_t* arch, arch_build_t* build)
+{
+    #define GO(A) arch->A = build->A;
+    SUPER()
+    #undef GO
+    arch->unaligned = build->unaligned;
+    arch->seq = 0;
+    void* p = ((void*)arch)+sizeof(arch_arch_t);
+    #define GO(A)                                           \
+    if(arch->A) {                                           \
+        memcpy(p, &build->A##_, sizeof(arch_ ##A##_t));     \
+        p+=sizeof(arch_##A##_t);                            \
+    }
+    SUPER()
+    #undef GO
+}
+
+static int sizeof_arch(arch_arch_t* arch)
+{
+    int sz = sizeof(arch_arch_t);
+    #define GO(A)   if(arch->A) sz+=sizeof(arch_##A##_t);
+    SUPER()
+    #undef GO
+    return sz;
+}
+
+void populate_arch(dynarec_rv64_t* dyn, void* p)
+{
+    arch_build_t build = {0};
+    arch_build_t previous = {0};
+    arch_arch_t* arch = p;
+    arch_arch_t* next = p;
+    int seq = 0;
+    for(int i=0; i<dyn->size; ++i) {
+        arch_build(dyn, i, &build);
+        if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) {
+            // same sequence, increment
+            seq++;
+            arch->seq = seq;
+        } else {
+            arch = next;
+            build_next(arch, &build);
+            seq = 0;
+            memcpy(&previous, &build, sizeof(arch_build_t));
+            int sz = sizeof_arch(arch);
+            next = (arch_arch_t*)((uintptr_t)arch+sz);
+        }
+    }
+}
+
+int getX64AddressInst(dynablock_t* db, uintptr_t x64pc); // define is signal.c
+
+void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc)
+{
+    if(!db->arch_size || !db->arch)
+        return;
+    int ninst = getX64AddressInst(db, x64pc);
+    dynarec_log(LOG_INFO, "adjust_arch(...), db=%p, x64pc=%p, nints=%d", db, (void*)x64pc, ninst);
+    if(ninst<0) {
+    dynarec_log(LOG_INFO, "\n");
+        return;
+    }
+    if(ninst==0) {
+    dynarec_log(LOG_INFO, "\n");
+        CHECK_FLAGS(emu);
+        return;
+    }
+    // look for state at ninst-1
+    arch_arch_t* arch = db->arch;
+    arch_arch_t* next = arch;
+    #define GO(A) arch_##A##_t* A = NULL;
+    SUPER()
+    #undef GO
+    int i = 0;
+    while(i<ninst-1) {
+        arch = next;
+        i += 1+arch->seq;
+        dynarec_log(LOG_INFO, "[ seq=%d%s%s%s%s%s ] ", arch->seq, arch->flags?" Flags":"", arch->x87?" x87":"", arch->mmx?" MMX":"", arch->sse?" SSE":"", arch->ymm?" YMM":"");
+        next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch));
+    }
+    int sz = sizeof(arch_arch_t);
+    #define GO(A)                                   \
+    if(arch->A) {                                   \
+        A = (arch_##A##_t*)((uintptr_t)arch + sz);  \
+        sz+=sizeof(arch_##A##_t);                   \
+    }
+    SUPER()
+    #undef GO
+    // todo
+    dynarec_log(LOG_INFO, "\n");
+}
+
+int arch_unaligned(dynablock_t* db, uintptr_t x64pc)
+{
+    if(!db->arch_size || !db->arch)
+        return 0;
+    int ninst = getX64AddressInst(db, x64pc);
+    if(ninst<0) {
+        return 0;
+    }
+    // look for state at ninst
+    arch_arch_t* arch = db->arch;
+    arch_arch_t* next = arch;
+    int i = -1;
+    while(i<ninst) {
+        arch = next;
+        i += 1+arch->seq;
+        next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch));
+    }
+    return arch->unaligned;
+}
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_arch.h b/src/dynarec/rv64/dynarec_rv64_arch.h
new file mode 100644
index 00000000..2045da64
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_arch.h
@@ -0,0 +1,20 @@
+#ifndef __DYNAREC_RV_ARCH_H__
+#define __DYNAREC_RV_ARCH_H__
+
+#include <stddef.h>
+#include <ucontext.h>
+
+#include "x64emu.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "dynarec_rv64_private.h"
+
+// get size of arch specific info (can be 0)
+size_t get_size_arch(dynarec_rv64_t* dyn);
+//populate the array
+void populate_arch(dynarec_rv64_t* dyn, void* p);
+//adjust flags and more
+void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc);
+// get if instruction can be regenerated for unaligned access
+int arch_unaligned(dynablock_t* db, uintptr_t x64pc);
+#endif // __DYNAREC_RV_ARCH_H__
diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c
index b4645b41..671ca9ed 100644
--- a/src/dynarec/rv64/dynarec_rv64_f30f.c
+++ b/src/dynarec/rv64/dynarec_rv64_f30f.c
@@ -398,8 +398,30 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("MOVDQU Ex,Gx");
             nextop = F8;
             GETGX();
-            GETEX(x2, 0, 8);
-            SSE_LOOP_MV_Q2(x3);
+            IF_UNALIGNED(ip) {
+                GETEX(x2, 0, 15);
+                LD(x3, gback, gdoffset + 0);
+                LD(x4, gback, gdoffset + 8);
+                for (int i = 0; i < 8; i++) {
+                    if (i == 0) {
+                        SB(x3, wback, fixedaddress);
+                    } else {
+                        SRLI(x5, x3, i * 8);
+                        SB(x5, wback, fixedaddress + i);
+                    }
+                }
+                for (int i = 0; i < 8; i++) {
+                    if (i == 0) {
+                        SB(x4, wback, fixedaddress + 8);
+                    } else {
+                        SRLI(x5, x4, i * 8);
+                        SB(x5, wback, fixedaddress + i + 8);
+                    }
+                }
+            } else {
+                GETEX(x2, 0, 8);
+                SSE_LOOP_MV_Q2(x3);
+            }
             if (!MODREG) SMWRITE2();
             break;
         case 0xAE:
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 332009d0..ab7a6cc6 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -814,6 +814,10 @@
 #define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B))))
 #define IFXN(A, B)  if ((dyn->insts[ninst].x64.gen_flags & (A) && !(dyn->insts[ninst].x64.gen_flags & (B))))
 
+#ifndef IF_UNALIGNED
+#define IF_UNALIGNED(A)    if(is_addr_unaligned(A))
+#endif
+
 #define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 #define LOAD_REG(A)  LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
index badb5c70..4a94b387 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass0.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -93,3 +93,6 @@
         else if (dyn->vector_sew == VECTOR_SEWNA && (set)) \
             dyn->vector_sew = VECTOR_SEW8;                 \
     } while (0)
+
+// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned
+#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1)))
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 1368b498..b39a589b 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -132,6 +132,7 @@ typedef struct instruction_rv64_s {
     uint8_t             nat_flags_carry:1;
     uint8_t             nat_flags_sign:1;
     uint8_t             nat_flags_needsign:1;
+    uint8_t             unaligned:1; // this opcode can be re-generated for unaligned special case
     uint8_t             nat_flags_op1;
     uint8_t             nat_flags_op2;
     flagcache_t         f_exit;     // flags status at end of instruction
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index 85a66590..10dbdb74 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -1000,14 +1000,14 @@ int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd,
 
     uint32_t funct3 = GET_FIELD(inst, 14, 12);
     uint32_t opcode = GET_FIELD(inst, 6, 0);
-    if ((opcode == 0b0100011 || opcode == 0b0100111 /* F */) && (funct3 == 0b010 /* (F)SW */ || funct3 == 0b011 /* (F)SD */)) {
+    if ((opcode == 0b0100011 || opcode == 0b0100111 /* F */) && (funct3 == 0b010 /* (F)SW */ || funct3 == 0b011 /* (F)SD */ || funct3 == 0b001 /* SH */)) {
         int val = (inst >> 20) & 0x1f;
         int dest = (inst >> 15) & 0x1f;
         int64_t imm = (GET_FIELD(inst, 31, 25) << 5) | (GET_FIELD(inst, 11, 7));
         imm = SIGN_EXT(imm, 12);
         volatile uint8_t *addr = (void *)(p->uc_mcontext.__gregs[dest] + imm);
         uint64_t value = opcode == 0b0100011 ? p->uc_mcontext.__gregs[val] : p->uc_mcontext.__fpregs.__d.__f[val<<1];
-        for(int i = 0; i < (funct3 == 0b010 ? 4 : 8); ++i) {
+        for(int i = 0; i < (funct3 == 0b010 ? 4 : funct3 == 0b011 ? 8 : 2); ++i) {
             addr[i] = (value >> (i * 8)) & 0xff;
         }
         p->uc_mcontext.__gregs[0] += 4; // pc += 4