diff options
| author | xctan <xctan@cirno.icu> | 2025-01-24 15:10:45 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2025-01-24 08:10:45 +0100 |
| commit | 8c1ffca530fb471984cd91347d93457575c7ee5d (patch) | |
| tree | 66508b2cf993b209f5f54a2e146ba66257528e42 /src | |
| parent | 7099774a3c82642ad64bc3a4f1b798e951904f7d (diff) | |
| download | box64-8c1ffca530fb471984cd91347d93457575c7ee5d.tar.gz box64-8c1ffca530fb471984cd91347d93457575c7ee5d.zip | |
[RV64_DYNAREC] Added codegen for unaligned stores (#2289)
* [RV64_DYNAREC] Added another special SIGBUS case * [RV64_DYNAREC] Added codegen for unaligned stores
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/dynarec_arch.h | 7 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_2.c | 16 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_00_3.c | 34 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_0f.c | 32 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_arch.c | 231 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_arch.h | 20 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_f30f.c | 26 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_helper.h | 4 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_pass0.h | 3 | ||||
| -rw-r--r-- | src/dynarec/rv64/dynarec_rv64_private.h | 1 | ||||
| -rw-r--r-- | src/libtools/signals.c | 4 |
11 files changed, 356 insertions, 22 deletions
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index 85ec3ef4..a1af30c4 100644 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -74,6 +74,7 @@ #include "rv64/rv64_printer.h" #include "rv64/dynarec_rv64_private.h" #include "rv64/dynarec_rv64_functions.h" +#include "rv64/dynarec_rv64_arch.h" // Limit here is unconditionnal jump, that is signed 21bits #define MAXBLOCK_SIZE ((1<<20)-200) @@ -81,11 +82,11 @@ #define UPDATE_SPECIFICS(A) #define PREUPDATE_SPECIFICS(A) updateNativeFlags(A) -#define ARCH_SIZE(A) 0 -#define ARCH_FILL(A, B) {} +#define ARCH_SIZE(A) get_size_arch(A) +#define ARCH_FILL(A, B) populate_arch(A, B) #define ARCH_ADJUST(A, B, C, D) {} #define STOP_NATIVE_FLAGS(A, B) {} -#define ARCH_UNALIGNED(A, B) 0 +#define ARCH_UNALIGNED(A, B) arch_unaligned(A, B) #else #error Unsupported platform #endif diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c index 4f994174..29898876 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_2.c +++ b/src/dynarec/rv64/dynarec_rv64_00_2.c @@ -404,8 +404,20 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int if (MODREG) { // reg <= reg MVxw(TO_NAT((nextop & 7) + (rex.b << 3)), gd); } else { // mem <= reg - addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); - SDxw(gd, ed, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, (1 << (2 + rex.w)) - 1, 0); + for (int i = 0; i < (1 << (2 + rex.w)); i++) { + if (i == 0) { + SB(gd, ed, fixedaddress); + } else { + SRLI(x3, gd, i * 8); + SB(x3, ed, fixedaddress + i); + } + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + SDxw(gd, ed, fixedaddress); + } SMWRITELOCK(lock); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c index 072ed33e..1a5d4fd7 100644 --- a/src/dynarec/rv64/dynarec_rv64_00_3.c +++ b/src/dynarec/rv64/dynarec_rv64_00_3.c @@ -392,14 +392,32 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ed = TO_NAT((nextop & 7) + (rex.b << 3)); MOV64xw(ed, i64); } else { // mem <= i32 - addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4); - i64 = F32S; - if (i64) { - MOV64x(x3, i64); - ed = x3; - } else - ed = xZR; - SDxw(ed, wback, fixedaddress); + IF_UNALIGNED(ip) { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, (1 << (2 + rex.w)) - 1, 4); + i64 = F32S; + if (i64) { + MOV64x(x4, i64); + ed = x4; + } else + ed = xZR; + for (int i = 0; i < (1 << (2 + rex.w)); i++) { + if (i == 0 || ed == xZR) { + SB(ed, wback, fixedaddress + i); + } else { + SRLI(x3, ed, i * 8); + SB(x3, wback, fixedaddress + i); + } + } + } else { + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 1, 4); + i64 = F32S; + if (i64) { + MOV64x(x3, i64); + ed = x3; + } else + ed = xZR; + SDxw(ed, wback, fixedaddress); + } SMWRITELOCK(lock); } break; diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c index fd22aad4..d594a718 100644 --- a/src/dynarec/rv64/dynarec_rv64_0f.c +++ b/src/dynarec/rv64/dynarec_rv64_0f.c @@ -180,11 +180,33 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni INST_NAME("MOVUPS Ex,Gx"); nextop = F8; GETGX(); - GETEX(x2, 0, 8); - LD(x3, gback, gdoffset + 0); - LD(x4, gback, gdoffset + 8); - SD(x3, wback, fixedaddress + 0); - SD(x4, wback, fixedaddress + 8); + IF_UNALIGNED(ip) { + GETEX(x2, 0, 15); + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); + for (int i = 0; i < 8; i++) { + if (i == 0) { + SB(x3, wback, fixedaddress); + } else { + SRLI(x5, x3, i * 8); + SB(x5, wback, fixedaddress + i); + } + } + for (int i = 0; i < 8; i++) { + if (i == 0) { + SB(x4, wback, fixedaddress + 8); + } else { + SRLI(x5, x4, i * 8); + SB(x5, wback, fixedaddress + i + 8); + } + } + } else { + GETEX(x2, 0, 8); + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); + SD(x3, wback, fixedaddress + 0); + SD(x4, wback, fixedaddress + 8); + } if (!MODREG) SMWRITE2(); break; diff --git a/src/dynarec/rv64/dynarec_rv64_arch.c b/src/dynarec/rv64/dynarec_rv64_arch.c new file mode 100644 index 00000000..25555c1f --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_arch.c @@ -0,0 +1,231 @@ +#include <stddef.h> +#include <stdio.h> +#include <signal.h> +#include <ucontext.h> +#include <string.h> + +#include "debug.h" +#include "dynablock.h" +#include "x64emu.h" +#include "emu/x64emu_private.h" +#include "x64run.h" +#include "emu/x64run_private.h" +#include "dynarec/dynablock_private.h" +#include "dynarec_rv64_arch.h" +#include "dynarec_rv64_functions.h" +#include "dynarec_rv64_private.h" + +//order might be important, so define SUPER for the right one +#define SUPER() \ + GO(flags) \ + GO(x87) \ + GO(mmx) \ + GO(sse) \ + GO(ymm) \ + + +typedef struct arch_flags_s +{ + uint8_t ignore:1; +} arch_flags_t; + +#define X87_ST_D 0 +#define X87_ST_F 1 +#define X87_ST_I64 2 +#define XMM0 0 +#define X870 XMM0 + 16 +#define EMM0 XMM0 + 16 +typedef struct arch_x87_s +{ + int8_t delta; //up to +/-7 + uint8_t x87; // 1bit is STx present + uint16_t x87_type; // 2bits per STx type + uint32_t x87_pos; //4bits per STx position (well, 3 would be enough) +} arch_x87_t; + +typedef struct arch_mmx_s +{ + uint8_t mmx; //1bit for each mmx reg present +} arch_mmx_t; + +typedef struct arch_sse_s +{ + uint16_t sse; //1bit for each sse reg present +} arch_sse_t; + +typedef struct arch_ymm_s +{ + uint16_t ymm0; // 1bit for ymm0 + uint16_t ymm; // 1bit for each ymm present + uint64_t ymm_pos; // 4bits for position of each ymm present +} arch_ymm_t; + +typedef struct arch_arch_s +{ + #define GO(A) uint16_t A:1; + SUPER() + #undef GO + uint16_t unaligned:1; + uint16_t seq:10; // how many instruction on the same values +} arch_arch_t; + +typedef struct arch_build_s +{ + #define GO(A) uint8_t A:1; + SUPER() + #undef GO + uint8_t unaligned; + #define GO(A) arch_##A##_t A##_; + SUPER() + #undef GO +} arch_build_t; + +static int arch_build(dynarec_rv64_t* dyn, int ninst, arch_build_t* arch) +{ + memset(arch, 0, sizeof(arch_build_t)); + // todo + // opcode can handle unaligned + arch->unaligned = dyn->insts[ninst].unaligned; + return arch->flags + arch->x87 + arch->mmx + arch->sse + arch->ymm + arch->unaligned; +} + +size_t get_size_arch(dynarec_rv64_t* dyn) +{ + arch_build_t build = {0}; + arch_build_t previous = {0}; + size_t sz = 0; + int seq = 0; + int nseq = 0; + int last = 0; + if(!dyn->size) return 0; + for(int i=0; i<dyn->size; ++i) { + last = arch_build(dyn, i, &build); + if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) { + // same sequence, increment + ++seq; + } else { + seq = 0; + ++nseq; + memcpy(&previous, &build, sizeof(arch_build_t)); + sz+=sizeof(arch_arch_t); + #define GO(A) if(build.A) sz+=sizeof(arch_##A##_t); + SUPER() + #undef GO + } + } + if(nseq==1 && !last) + return 0; //empty, no flags, no nothing + return sz; +} + +static void build_next(arch_arch_t* arch, arch_build_t* build) +{ + #define GO(A) arch->A = build->A; + SUPER() + #undef GO + arch->unaligned = build->unaligned; + arch->seq = 0; + void* p = ((void*)arch)+sizeof(arch_arch_t); + #define GO(A) \ + if(arch->A) { \ + memcpy(p, &build->A##_, sizeof(arch_ ##A##_t)); \ + p+=sizeof(arch_##A##_t); \ + } + SUPER() + #undef GO +} + +static int sizeof_arch(arch_arch_t* arch) +{ + int sz = sizeof(arch_arch_t); + #define GO(A) if(arch->A) sz+=sizeof(arch_##A##_t); + SUPER() + #undef GO + return sz; +} + +void populate_arch(dynarec_rv64_t* dyn, void* p) +{ + arch_build_t build = {0}; + arch_build_t previous = {0}; + arch_arch_t* arch = p; + arch_arch_t* next = p; + int seq = 0; + for(int i=0; i<dyn->size; ++i) { + arch_build(dyn, i, &build); + if((!memcmp(&build, &previous, sizeof(arch_build_t))) && (seq<((1<<10)-1)) && i) { + // same sequence, increment + seq++; + arch->seq = seq; + } else { + arch = next; + build_next(arch, &build); + seq = 0; + memcpy(&previous, &build, sizeof(arch_build_t)); + int sz = sizeof_arch(arch); + next = (arch_arch_t*)((uintptr_t)arch+sz); + } + } +} + +int getX64AddressInst(dynablock_t* db, uintptr_t x64pc); // define is signal.c + +void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc) +{ + if(!db->arch_size || !db->arch) + return; + int ninst = getX64AddressInst(db, x64pc); + dynarec_log(LOG_INFO, "adjust_arch(...), db=%p, x64pc=%p, nints=%d", db, (void*)x64pc, ninst); + if(ninst<0) { + dynarec_log(LOG_INFO, "\n"); + return; + } + if(ninst==0) { + dynarec_log(LOG_INFO, "\n"); + CHECK_FLAGS(emu); + return; + } + // look for state at ninst-1 + arch_arch_t* arch = db->arch; + arch_arch_t* next = arch; + #define GO(A) arch_##A##_t* A = NULL; + SUPER() + #undef GO + int i = 0; + while(i<ninst-1) { + arch = next; + i += 1+arch->seq; + dynarec_log(LOG_INFO, "[ seq=%d%s%s%s%s%s ] ", arch->seq, arch->flags?" Flags":"", arch->x87?" x87":"", arch->mmx?" MMX":"", arch->sse?" SSE":"", arch->ymm?" YMM":""); + next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch)); + } + int sz = sizeof(arch_arch_t); + #define GO(A) \ + if(arch->A) { \ + A = (arch_##A##_t*)((uintptr_t)arch + sz); \ + sz+=sizeof(arch_##A##_t); \ + } + SUPER() + #undef GO + // todo + dynarec_log(LOG_INFO, "\n"); +} + +int arch_unaligned(dynablock_t* db, uintptr_t x64pc) +{ + if(!db->arch_size || !db->arch) + return 0; + int ninst = getX64AddressInst(db, x64pc); + if(ninst<0) { + return 0; + } + // look for state at ninst + arch_arch_t* arch = db->arch; + arch_arch_t* next = arch; + int i = -1; + while(i<ninst) { + arch = next; + i += 1+arch->seq; + next = (arch_arch_t*)((uintptr_t)next + sizeof_arch(arch)); + } + return arch->unaligned; +} \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_arch.h b/src/dynarec/rv64/dynarec_rv64_arch.h new file mode 100644 index 00000000..2045da64 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_arch.h @@ -0,0 +1,20 @@ +#ifndef __DYNAREC_RV_ARCH_H__ +#define __DYNAREC_RV_ARCH_H__ + +#include <stddef.h> +#include <ucontext.h> + +#include "x64emu.h" +#include "box64context.h" +#include "dynarec.h" +#include "dynarec_rv64_private.h" + +// get size of arch specific info (can be 0) +size_t get_size_arch(dynarec_rv64_t* dyn); +//populate the array +void populate_arch(dynarec_rv64_t* dyn, void* p); +//adjust flags and more +void adjust_arch(dynablock_t* db, x64emu_t* emu, ucontext_t* p, uintptr_t x64pc); +// get if instruction can be regenerated for unaligned access +int arch_unaligned(dynablock_t* db, uintptr_t x64pc); +#endif // __DYNAREC_RV_ARCH_H__ diff --git a/src/dynarec/rv64/dynarec_rv64_f30f.c b/src/dynarec/rv64/dynarec_rv64_f30f.c index b4645b41..671ca9ed 100644 --- a/src/dynarec/rv64/dynarec_rv64_f30f.c +++ b/src/dynarec/rv64/dynarec_rv64_f30f.c @@ -398,8 +398,30 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int INST_NAME("MOVDQU Ex,Gx"); nextop = F8; GETGX(); - GETEX(x2, 0, 8); - SSE_LOOP_MV_Q2(x3); + IF_UNALIGNED(ip) { + GETEX(x2, 0, 15); + LD(x3, gback, gdoffset + 0); + LD(x4, gback, gdoffset + 8); + for (int i = 0; i < 8; i++) { + if (i == 0) { + SB(x3, wback, fixedaddress); + } else { + SRLI(x5, x3, i * 8); + SB(x5, wback, fixedaddress + i); + } + } + for (int i = 0; i < 8; i++) { + if (i == 0) { + SB(x4, wback, fixedaddress + 8); + } else { + SRLI(x5, x4, i * 8); + SB(x5, wback, fixedaddress + i + 8); + } + } + } else { + GETEX(x2, 0, 8); + SSE_LOOP_MV_Q2(x3); + } if (!MODREG) SMWRITE2(); break; case 0xAE: diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h index 332009d0..ab7a6cc6 100644 --- a/src/dynarec/rv64/dynarec_rv64_helper.h +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -814,6 +814,10 @@ #define IFX2X(A, B) if ((dyn->insts[ninst].x64.gen_flags == (A) || dyn->insts[ninst].x64.gen_flags == (B) || dyn->insts[ninst].x64.gen_flags == ((A) | (B)))) #define IFXN(A, B) if ((dyn->insts[ninst].x64.gen_flags & (A) && !(dyn->insts[ninst].x64.gen_flags & (B)))) +#ifndef IF_UNALIGNED +#define IF_UNALIGNED(A) if(is_addr_unaligned(A)) +#endif + #define STORE_REG(A) SD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) #define LOAD_REG(A) LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A])) diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h index badb5c70..4a94b387 100644 --- a/src/dynarec/rv64/dynarec_rv64_pass0.h +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -93,3 +93,6 @@ else if (dyn->vector_sew == VECTOR_SEWNA && (set)) \ dyn->vector_sew = VECTOR_SEW8; \ } while (0) + +// mark opcode as "unaligned" possible only if the current address is not marked as already unaligned +#define IF_UNALIGNED(A) if((dyn->insts[ninst].unaligned=(is_addr_unaligned(A)?0:1))) \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h index 1368b498..b39a589b 100644 --- a/src/dynarec/rv64/dynarec_rv64_private.h +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -132,6 +132,7 @@ typedef struct instruction_rv64_s { uint8_t nat_flags_carry:1; uint8_t nat_flags_sign:1; uint8_t nat_flags_needsign:1; + uint8_t unaligned:1; // this opcode can be re-generated for unaligned special case uint8_t nat_flags_op1; uint8_t nat_flags_op2; flagcache_t f_exit; // flags status at end of instruction diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 85a66590..10dbdb74 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -1000,14 +1000,14 @@ int sigbus_specialcases(siginfo_t* info, void * ucntx, void* pc, void* _fpsimd, uint32_t funct3 = GET_FIELD(inst, 14, 12); uint32_t opcode = GET_FIELD(inst, 6, 0); - if ((opcode == 0b0100011 || opcode == 0b0100111 /* F */) && (funct3 == 0b010 /* (F)SW */ || funct3 == 0b011 /* (F)SD */)) { + if ((opcode == 0b0100011 || opcode == 0b0100111 /* F */) && (funct3 == 0b010 /* (F)SW */ || funct3 == 0b011 /* (F)SD */ || funct3 == 0b001 /* SH */)) { int val = (inst >> 20) & 0x1f; int dest = (inst >> 15) & 0x1f; int64_t imm = (GET_FIELD(inst, 31, 25) << 5) | (GET_FIELD(inst, 11, 7)); imm = SIGN_EXT(imm, 12); volatile uint8_t *addr = (void *)(p->uc_mcontext.__gregs[dest] + imm); uint64_t value = opcode == 0b0100011 ? p->uc_mcontext.__gregs[val] : p->uc_mcontext.__fpregs.__d.__f[val<<1]; - for(int i = 0; i < (funct3 == 0b010 ? 4 : 8); ++i) { + for(int i = 0; i < (funct3 == 0b010 ? 4 : funct3 == 0b011 ? 8 : 2); ++i) { addr[i] = (value >> (i * 8)) & 0xff; } p->uc_mcontext.__gregs[0] += 4; // pc += 4 |