diff options
Diffstat (limited to 'src')
33 files changed, 3624 insertions, 54 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c index aa09934c..4d5d875f 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.c +++ b/src/dynarec/arm64/dynarec_arm64_helper.c @@ -1753,3 +1753,65 @@ void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4) MVNw_REG(s4, s4); BFIw(xFlags, s4, F_PF, 1); } + + +void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n) +{ + MESSAGE(LOG_DEBUG, "Reset Caches with %d\n",reset_n); + #if STEP > 1 + // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap) + dyn->n = dyn->insts[ninst].n; + neoncacheUnwind(&dyn->n); + #ifdef HAVE_TRACE + if(box64_dynarec_dump) + if(memcmp(&dyn->n, &dyn->insts[reset_n].n, sizeof(neon_cache_t))) { + MESSAGE(LOG_DEBUG, "Warning, difference in neoncache: reset="); + for(int i=0; i<24; ++i) + if(dyn->insts[reset_n].n.neoncache[i].v) + MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[reset_n].n.neoncache[i].t, dyn->insts[reset_n].n.neoncache[i].n)); + if(dyn->insts[reset_n].n.combined1 || dyn->insts[reset_n].n.combined2) + MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[reset_n].n.swapped?"SWP":"CMB", dyn->insts[reset_n].n.combined1, dyn->insts[reset_n].n.combined2); + if(dyn->insts[reset_n].n.stack_push || dyn->insts[reset_n].n.stack_pop) + MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[reset_n].n.stack_push, -dyn->insts[reset_n].n.stack_pop); + MESSAGE(LOG_DEBUG, " ==> "); + for(int i=0; i<24; ++i) + if(dyn->insts[ninst].n.neoncache[i].v) + MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[ninst].n.neoncache[i].t, dyn->insts[ninst].n.neoncache[i].n)); + if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) + MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[ninst].n.swapped?"SWP":"CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2); + if(dyn->insts[ninst].n.stack_push || dyn->insts[ninst].n.stack_pop) + MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[ninst].n.stack_push, -dyn->insts[ninst].n.stack_pop); + MESSAGE(LOG_DEBUG, " -> "); + for(int i=0; i<24; ++i) + if(dyn->n.neoncache[i].v) + MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->n.neoncache[i].t, dyn->n.neoncache[i].n)); + if(dyn->n.combined1 || dyn->n.combined2) + MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->n.swapped?"SWP":"CMB", dyn->n.combined1, dyn->n.combined2); + if(dyn->n.stack_push || dyn->n.stack_pop) + MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->n.stack_push, -dyn->n.stack_pop); + MESSAGE(LOG_DEBUG, "\n"); + } + #endif //HAVE_TRACE + #else + dyn->n = dyn->insts[reset_n].n; + #endif +} + +// propagate ST stack state, especial stack pop that are defered +void fpu_propagate_stack(dynarec_arm_t* dyn, int ninst) +{ + if(dyn->n.stack_pop) { + for(int j=0; j<24; ++j) + if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D || dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) { + if(dyn->n.neoncache[j].n<dyn->n.stack_pop) + dyn->n.neoncache[j].v = 0; + else + dyn->n.neoncache[j].n-=dyn->n.stack_pop; + } + dyn->n.stack_pop = 0; + } + dyn->n.stack = dyn->n.stack_next; + dyn->n.news = 0; + dyn->n.stack_push = 0; + dyn->n.swapped = 0; +} \ No newline at end of file diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h index f13331ac..ba830b21 100755 --- a/src/dynarec/arm64/dynarec_arm64_helper.h +++ b/src/dynarec/arm64/dynarec_arm64_helper.h @@ -969,6 +969,8 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr); #define fpu_pushcache STEPNAME(fpu_pushcache) #define fpu_popcache STEPNAME(fpu_popcache) #define fpu_reset STEPNAME(fpu_reset) +#define fpu_reset_cache STEPNAME(fpu_reset_cache) +#define fpu_propagate_stack STEPNAME(fpu_propagate_stack) #define fpu_purgecache STEPNAME(fpu_purgecache) #define mmx_purgecache STEPNAME(mmx_purgecache) #define x87_purgecache STEPNAME(x87_purgecache) @@ -1154,6 +1156,10 @@ void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1); // common coproc helpers // reset the cache void fpu_reset(dynarec_arm_t* dyn); +// reset the cache with n +void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n); +// propagate stack state +void fpu_propagate_stack(dynarec_arm_t* dyn, int ninst); // purge the FPU cache (needs 3 scratch registers) void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1, int s2, int s3); // purge MMX cache diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h index e5e98658..d1d30599 100755 --- a/src/dynarec/dynarec_arch.h +++ b/src/dynarec/dynarec_arch.h @@ -18,6 +18,15 @@ #define dynarec_native_t dynarec_la464_t +#elif defined(RV64) +#include "rv64/rv64_printer.h" +#include "rv64/dynarec_rv64_private.h" +#include "rv64/dynarec_rv64_functions.h" + +#define instruction_native_t instruction_rv64_t +#define dynarec_native_t dynarec_rv64_t + + #else #error Unsupported platform #endif diff --git a/src/dynarec/dynarec_helper.h b/src/dynarec/dynarec_helper.h index 19f48dcb..2f4b82fa 100755 --- a/src/dynarec/dynarec_helper.h +++ b/src/dynarec/dynarec_helper.h @@ -5,6 +5,8 @@ #include "arm64/dynarec_arm64_helper.h" #elif defined(LA464) #include "la464/dynarec_la464_helper.h" +#elif defined(RV64) +#include "rv64/dynarec_rv64_helper.h" #else #error Unsupported architecture #endif diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c index c3f01ff2..06c4878c 100755 --- a/src/dynarec/dynarec_native.c +++ b/src/dynarec/dynarec_native.c @@ -325,7 +325,7 @@ static void fillPredecessors(dynarec_native_t* dyn) } // updateNeed goes backward, from last intruction to top -static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) { +static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) { while (ninst>=0) { // need pending but instruction is only a subset: remove pend and use an X_ALL instead need |= dyn->insts[ninst].x64.need_after; diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c index ca8da160..14f85b40 100755 --- a/src/dynarec/dynarec_native_pass.c +++ b/src/dynarec/dynarec_native_pass.c @@ -60,44 +60,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr) dyn->f.pending = 0; fpu_reset(dyn); } else { - MESSAGE(LOG_DEBUG, "Reset Caches with %d\n",reset_n); - #if STEP > 1 - // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap) - dyn->n = dyn->insts[ninst].n; - neoncacheUnwind(&dyn->n); - #ifdef HAVE_TRACE - if(box64_dynarec_dump) - if(memcmp(&dyn->n, &dyn->insts[reset_n].n, sizeof(neon_cache_t))) { - MESSAGE(LOG_DEBUG, "Warning, difference in neoncache: reset="); - for(int i=0; i<24; ++i) - if(dyn->insts[reset_n].n.neoncache[i].v) - MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[reset_n].n.neoncache[i].t, dyn->insts[reset_n].n.neoncache[i].n)); - if(dyn->insts[reset_n].n.combined1 || dyn->insts[reset_n].n.combined2) - MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[reset_n].n.swapped?"SWP":"CMB", dyn->insts[reset_n].n.combined1, dyn->insts[reset_n].n.combined2); - if(dyn->insts[reset_n].n.stack_push || dyn->insts[reset_n].n.stack_pop) - MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[reset_n].n.stack_push, -dyn->insts[reset_n].n.stack_pop); - MESSAGE(LOG_DEBUG, " ==> "); - for(int i=0; i<24; ++i) - if(dyn->insts[ninst].n.neoncache[i].v) - MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[ninst].n.neoncache[i].t, dyn->insts[ninst].n.neoncache[i].n)); - if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) - MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[ninst].n.swapped?"SWP":"CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2); - if(dyn->insts[ninst].n.stack_push || dyn->insts[ninst].n.stack_pop) - MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[ninst].n.stack_push, -dyn->insts[ninst].n.stack_pop); - MESSAGE(LOG_DEBUG, " -> "); - for(int i=0; i<24; ++i) - if(dyn->n.neoncache[i].v) - MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->n.neoncache[i].t, dyn->n.neoncache[i].n)); - if(dyn->n.combined1 || dyn->n.combined2) - MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->n.swapped?"SWP":"CMB", dyn->n.combined1, dyn->n.combined2); - if(dyn->n.stack_push || dyn->n.stack_pop) - MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->n.stack_push, -dyn->n.stack_pop); - MESSAGE(LOG_DEBUG, "\n"); - } - #endif //HAVE_TRACE - #else - dyn->n = dyn->insts[reset_n].n; - #endif + fpu_reset_cache(dyn, ninst, reset_n); dyn->f = dyn->insts[reset_n].f_exit; if(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT) { MESSAGE(LOG_DEBUG, "Apply Barrier Float\n"); @@ -112,21 +75,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr) reset_n = -1; } else if(ninst && (dyn->insts[ninst].pred_sz!=1 || dyn->insts[ninst].pred[0]!=ninst-1)) dyn->last_ip = 0; // reset IP if some jump are comming here - // propagate ST stack state, especial stack pop that are defered - if(dyn->n.stack_pop) { - for(int j=0; j<24; ++j) - if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D || dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) { - if(dyn->n.neoncache[j].n<dyn->n.stack_pop) - dyn->n.neoncache[j].v = 0; - else - dyn->n.neoncache[j].n-=dyn->n.stack_pop; - } - dyn->n.stack_pop = 0; - } - dyn->n.stack = dyn->n.stack_next; - dyn->n.news = 0; - dyn->n.stack_push = 0; - dyn->n.swapped = 0; + fpu_propagate_stack(dyn, ninst); NEW_INST; if(dyn->insts[ninst].pred_sz>1) {SMSTART();} fpu_reset_scratch(dyn); diff --git a/src/dynarec/dynarec_next.h b/src/dynarec/dynarec_next.h index 19d426b8..0ad6c18d 100644 --- a/src/dynarec/dynarec_next.h +++ b/src/dynarec/dynarec_next.h @@ -15,6 +15,13 @@ void la464_epilog() EXPORTDYN; #define native_next la464_next #define native_prolog la464_prolog #define native_epilog la464_epilog +#elif defined(RV64) +void rv64_next(void) EXPORTDYN; +void rv64_prolog(x64emu_t* emu, void* addr) EXPORTDYN; +void rv64_epilog() EXPORTDYN; +#define native_next rv64_next +#define native_prolog rv64_prolog +#define native_epilog rv64_epilog #else #error Unsupported architecture #endif diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h index 32cf56fa..2134673e 100755 --- a/src/dynarec/native_lock.h +++ b/src/dynarec/native_lock.h @@ -27,6 +27,46 @@ #define native_lock_decifnot0(A) arm64_lock_decifnot0(A) #define native_lock_store(A, B) arm64_lock_store(A, B) +#elif defined(RV64) +#include "rv64/rv64_lock.h" + +#define USE_CAS +// RV64 is quite strict (or at least strongly recommand) on what you can do between an LD.A and an SD.A +// That basicaly forbid to call a function, so there cannot be READ / WRITE separated +// And so need to use a Compare and Swap mecanism instead + +// no byte or 2-bytes atomic access on RISC-V +#define native_lock_xchg(A, B) rv64_lock_xchg(A, B) +#define native_lock_xchg_d(A, B) rv64_lock_xchg_d(A, B) +#define native_lock_storeifref(A, B, C) rv64_lock_storeifref(A, B, C) +#define native_lock_storeifref_d(A, B, C) rv64_lock_storeifref_d(A, B, C) +#define native_lock_storeifref2_d(A, B, C) rv64_lock_storeifref2_d(A, B, C) +#define native_lock_storeifnull(A, B) rv64_lock_storeifnull(A, B) +#define native_lock_storeifnull_d(A, B) rv64_lock_storeifnull_d(A, B) +#define native_lock_decifnot0b(A) rv64_lock_decifnot0b(A) +#define native_lock_storeb(A, B) rv64_lock_storeb(A, B) +#define native_lock_incif0(A) rv64_lock_incif0(A) +#define native_lock_decifnot0(A) rv64_lock_decifnot0(A) +#define native_lock_store(A, B) rv64_lock_store(A, B) +#define native_lock_cas_d(A, B, C) rv64_lock_cas_d(A, B, C) +#define native_lock_cas_dd(A, B, C) rv64_lock_cas_dd(A, B, C) + +#define native_lock_xchg_b(A, B) rv64_lock_xchg_b(A, B) +#define native_lock_cas_b(A, B, C) rv64_lock_cas_b(A, B, C) +#define native_lock_cas_h(A, B, C) rv64_lock_cas_h(A, B, C) + +#define native_lock_read_b(A) tmpcas=*(uint8_t*)(A) +#define native_lock_write_b(A, B) rv64_lock_cas_b(A, tmpcas, B) +#define native_lock_read_h(A) tmpcas=*(uint16_t*)(A) +#define native_lock_write_h(A, B) rv64_lock_cas_h(A, tmpcas, B) +#define native_lock_read_d(A) tmpcas=*(uint32_t*)(A) +#define native_lock_write_d(A, B) rv64_lock_cas_d(A, tmpcas, B) +#define native_lock_read_dd(A) tmpcas=*(uint64_t*)(A) +#define native_lock_write_dd(A, B) rv64_lock_cas_dd(A, tmpcas, B) +// there is no atomic move on 16bytes, so faking it +#define native_lock_read_dq(A, B, C) *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1]; +#define native_lock_write_dq(A, B, C) rv64_lock_cas_dq(C, A, tmpcas, B); + #else #error Unsupported architecture #endif diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c new file mode 100644 index 00000000..8fa698a0 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_00.c @@ -0,0 +1,121 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <pthread.h> +#include <errno.h> +#include <signal.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "bridge.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "custommem.h" + +#include "rv64_printer.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_functions.h" +#include "dynarec_rv64_helper.h" + +int isSimpleWrapper(wrapper_t fun); + +uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + uint8_t nextop, opcode; + uint8_t gd, ed; + int8_t i8; + int32_t i32, tmp; + int64_t i64, j64; + uint8_t u8; + uint8_t gb1, gb2, eb1, eb2; + uint32_t u32; + uint64_t u64; + uint8_t wback, wb1, wb2, wb; + int64_t fixedaddress; + int lock; + int cacheupd = 0; + + opcode = F8; + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(j64); + MAYUSE(wb); + MAYUSE(lock); + MAYUSE(cacheupd); + + switch(opcode) { + + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + INST_NAME("PUSH reg"); + gd = xRAX+(opcode&0x07)+(rex.b<<3); + SD(gd, xRSP, -8); + SUBI(xRSP, xRSP, 8); + break; + + case 0x89: + INST_NAME("MOV Ed, Gd"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg + MVxw(xRAX+(nextop&7)+(rex.b<<3), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + SDxw(gd, ed, fixedaddress); + SMWRITELOCK(lock); + } + break; + + case 0x8D: + INST_NAME("LEA Gd, Ed"); + nextop=F8; + GETGD; + if(MODREG) { // reg <= reg? that's an invalid operation + DEFAULT; + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0); + if(gd!=ed) { // it's sometimes used as a 3 bytes NOP + MV(gd, ed); + } + else if(!rex.w) { + ZEROUP(gd); //truncate the higher 32bits as asked + } + } + break; + + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + INST_NAME("POP reg"); + gd = xRAX+(opcode&0x07)+(rex.b<<3); + LD(gd, xRSP, 0); + if(gd!=xRSP) { + ADDI(xRSP, xRSP, 8); + } + break; + + default: + DEFAULT; + } + + return addr; +} diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c new file mode 100644 index 00000000..b26e1175 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_functions.c @@ -0,0 +1,207 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> +#include <errno.h> +#include <string.h> +#include <math.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "tools/bridge_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "emu/x87emu_private.h" +#include "x64trace.h" +#include "signals.h" +#include "dynarec_rv64.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_functions.h" +#include "custommem.h" +#include "bridge.h" +#include "rv64_lock.h" + +void fpu_reset_scratch(dynarec_rv64_t* dyn) +{ + //TODO +} + +#define F8 *(uint8_t*)(addr++) +#define F32 *(uint32_t*)(addr+=4, addr-4) +#define F32S64 (uint64_t)(int64_t)*(int32_t*)(addr+=4, addr-4) +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_rv64_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta) +{ + (void)dyn; (void)ninst; + + uint32_t tested = (1<<parity)-1; + if((nextop&0xC0)==0xC0) + return 0; // direct register, no parity... + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = (sib>>3)&7; + if((sib&0x7)==5) { + uint64_t tmp = F32S64; + if (sib_reg!=4) { + // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough + return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0; + } else { + // just a constant... + return (tmp&tested)?0:1; + } + } else { + if(sib_reg==4 && parity<3) + return 0; // simple [reg] + // don't try [reg1 + reg2<<N], unless reg1 is ESP + return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0; + } + } else if((nextop&7)==5) { + uint64_t tmp = F32S64; + tmp+=addr+delta; + return (tmp&tested)?0:1; + } else { + return 0; + } + } else { + return 0; //Form [reg1 + reg2<<N + XXXXXX] + } +} + +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) +{ + (void)dyn; (void)addr; (void)ninst; + + if((nextop&0xC0)==0xC0) + return addr; + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + if((sib&0x7)==5) { + addr+=4; + } + } else if((nextop&7)==5) { + addr+=4; + } + } else { + if((nextop&7)==4) { + ++addr; + } + if(nextop&0x80) { + addr+=4; + } else { + ++addr; + } + } + return addr; +} +#undef F8 +#undef F32 + +int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn) +{ + (void)dyn; + +#define PK(a) *(uint8_t*)(addr+a) +#define PK32(a) *(int32_t*)(addr+a) + + if(!addr || !getProtection(addr)) + return 0; + if(PK(0)==0xff && PK(1)==0x25) { // "absolute" jump, maybe the GOT (well, RIP relative in fact) + uintptr_t a1 = addr + 6 + (PK32(2)); // need to add a check to see if the address is from the GOT ! + addr = (uintptr_t)getAlternate(*(void**)a1); + } + if(!addr || !getProtection(addr)) + return 0; + onebridge_t *b = (onebridge_t*)(addr); + if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) { + // found ! + if(retn) *retn = (b->C3==0xC2)?b->N:0; + if(calladdress) *calladdress = addr+1; + return 1; + } + return 0; +#undef PK32 +#undef PK +} + +// is inst clean for a son branch? +int isInstClean(dynarec_rv64_t* dyn, int ninst) +{ + // check flags cache + if(dyn->insts[ninst].f_entry.dfnone || dyn->insts[ninst].f_entry.pending) + return 0; + if(dyn->insts[ninst].x64.state_flags) + return 0; + return 1; +} + +int isPred(dynarec_rv64_t* dyn, int ninst, int pred) { + for(int i=0; i<dyn->insts[ninst].pred_sz; ++i) + if(dyn->insts[ninst].pred[i]==pred) + return pred; + return -1; +} +int getNominalPred(dynarec_rv64_t* dyn, int ninst) { + if((ninst<=0) || !dyn->insts[ninst].pred_sz) + return -1; + if(isPred(dyn, ninst, ninst-1)!=-1) + return ninst-1; + return dyn->insts[ninst].pred[0]; +} + +uint8_t extract_byte(uint32_t val, void* address){ + int idx = (((uintptr_t)address)&3)*8; + return (val>>idx)&0xff; +} +uint32_t insert_byte(uint32_t val, uint8_t b, void* address){ + int idx = (((uintptr_t)address)&3)*8; + val&=~(0xff<<idx); + val|=(((uint32_t)b)<<idx); + return val; +} + +// will go badly if address is unaligned +uint16_t extract_half(uint32_t val, void* address){ + int idx = (((uintptr_t)address)&3)*8; + return (val>>idx)&0xffff; +} +uint32_t insert_half(uint32_t val, uint16_t h, void* address){ + int idx = (((uintptr_t)address)&3)*8; + val&=~(0xffff<<idx); + val|=(((uint32_t)h)<<idx); + return val; +} + +uint8_t rv64_lock_xchg_b(void* addr, uint8_t val) +{ + uint32_t ret; + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + do { + ret = *aligned; + } while(rv64_lock_cas_d(aligned, ret, insert_byte(ret, val, addr))); + return extract_byte(ret, addr); +} + +int rv64_lock_cas_b(void* addr, uint8_t ref, uint8_t val) +{ + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + uint32_t tmp = *aligned; + return rv64_lock_cas_d(aligned, tmp, insert_byte(tmp, val, addr)); +} + +int rv64_lock_cas_h(void* addr, uint16_t ref, uint16_t val) +{ + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + uint32_t tmp = *aligned; + return rv64_lock_cas_d(aligned, tmp, insert_half(tmp, val, addr)); +} \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h new file mode 100644 index 00000000..79ff47a3 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_functions.h @@ -0,0 +1,26 @@ +#ifndef __DYNAREC_RV64_FUNCTIONS_H__ +#define __DYNAREC_RV64_FUNCTIONS_H__ +#include <stdint.h> + +typedef struct x64emu_s x64emu_t; +typedef struct dynarec_rv64_s dynarec_rv64_t; + +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_rv64_t* dyn); + +// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD +int getedparity(dynarec_rv64_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta); +// Do the GETED, but don't emit anything... +uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop); + +// Is what pointed at addr a native call? And if yes, to what function? +int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn); + +// predecessor access +int isPred(dynarec_rv64_t* dyn, int ninst, int pred); +int getNominalPred(dynarec_rv64_t* dyn, int ninst); + +// is inst clean for a son branch? +int isInstClean(dynarec_rv64_t* dyn, int ninst); + +#endif //__DYNAREC_RV64_FUNCTIONS_H__ \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c new file mode 100644 index 00000000..3dee0d98 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_helper.c @@ -0,0 +1,323 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <pthread.h> +#include <errno.h> +#include <assert.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../dynablock_private.h" +#include "../tools/bridge_private.h" +#include "custommem.h" + +#include "rv64_printer.h" +#include "dynarec_rv64_private.h" +#include "dynarec_rv64_functions.h" +#include "dynarec_rv64_helper.h" + +/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ +uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int *l, int i12, int delta) +{ + MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta); + + int lock = l?((l==LOCK_LOCK)?1:2):0; + if(lock==2) + *l = 0; + uint8_t ret = x2; + *fixaddress = 0; + if(hint>0) ret = hint; + MAYUSE(scratch); + if(!(nextop&0xC0)) { + if((nextop&7)==4) { + uint8_t sib = F8; + int sib_reg = ((sib>>3)&7)+(rex.x<<3); + if((sib&0x7)==5) { + int64_t tmp = F32S; + if (sib_reg!=4) { + if(tmp && ((tmp<-2048) || (tmp>2047) || !i12)) { + MOV64x(scratch, tmp); + SLLI(ret, xRAX+sib_reg, (sib>>6)); + ADD(ret, ret, scratch); + } else { + SLLI(ret, xRAX+sib_reg, (sib>>6)); + *fixaddress = tmp; + } + } else { + switch(lock) { + case 1: addLockAddress(tmp); break; + case 2: if(isLockAddress(tmp)) *l=1; break; + } + MOV64x(ret, tmp); + } + } else { + if (sib_reg!=4) { + SLLI(scratch, xRAX+sib_reg, (sib>>6)); + ADD(ret, xRAX+(sib&0x7)+(rex.b<<3), scratch); + } else { + ret = xRAX+(sib&0x7)+(rex.b<<3); + } + } + } else if((nextop&7)==5) { + int64_t tmp = F32S64; + if(i12 && (tmp>=-2048) && (tmp<=2047)) { + GETIP(addr+delta); + ret = xRIP; + *fixaddress = tmp; + } else if((tmp>=-2048) && (tmp<=2047)) { + GETIP(addr+delta); + ADDI(ret, xRIP, tmp); + } else if(tmp+addr+delta<0x100000000LL) { + MOV64x(ret, tmp+addr+delta); + } else { + MOV64x(ret, tmp); + GETIP(addr+delta); + ADD(ret, ret, xRIP); + } + switch(lock) { + case 1: addLockAddress(addr+delta+tmp); break; + case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break; + } + } else { + ret = xRAX+(nextop&7)+(rex.b<<3); + } + } else { + int64_t i64; + uint8_t sib = 0; + int sib_reg = 0; + if((nextop&7)==4) { + sib = F8; + sib_reg = ((sib>>3)&7)+(rex.x<<3); + } + if(nextop&0x80) + i64 = F32S; + else + i64 = F8S; + if(i64==0 || ((i64>=-2048) && (i64<=2047) && i12)) { + *fixaddress = i64; + if((nextop&7)==4) { + if (sib_reg!=4) { + SLLI(scratch, xRAX+sib_reg, (sib>>6)); + ADD(ret, xRAX+(sib&0x07)+(rex.b<<3), scratch); + } else { + ret = xRAX+(sib&0x07)+(rex.b<<3); + } + } else + ret = xRAX+(nextop&0x07)+(rex.b<<3); + } else { + if(i64>=-2048 && i64<=2047) { + if((nextop&7)==4) { + if (sib_reg!=4) { + SLLI(scratch, xRAX+sib_reg, (sib>>6)); + ADD(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch); + } else { + scratch = xRAX+(sib&0x07)+(rex.b<<3); + } + } else + scratch = xRAX+(nextop&0x07)+(rex.b<<3); + ADDI(ret, scratch, i64); + } else { + MOV64x(scratch, i64); + if((nextop&7)==4) { + if (sib_reg!=4) { + ADD(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3)); + SLLI(ret, xRAX+sib_reg, (sib>>6)); + ADD(ret, scratch, ret); + } else { + PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3)); + ADD(ret, tmp, scratch); + } + } else { + PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3)); + ADD(ret, tmp, scratch); + } + } + } + } + *ed = ret; + return addr; +} + +void jump_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst) +{ + MAYUSE(dyn); MAYUSE(ip); MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to epilog\n"); + + if(reg) { + if(reg!=xRIP) { + MV(xRIP, reg); + } + } else { + GETIP_(ip); + } + TABLE64(x2, (uintptr_t)rv64_epilog); + SMEND(); + BR(x2); +} + +void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst) +{ + MAYUSE(dyn); MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to next\n"); + + if(reg) { + if(reg!=xRIP) { + MV(xRIP, reg); + } + uintptr_t tbl = getJumpTable64(); + MAYUSE(tbl); + TABLE64(x3, tbl); + SRLI(x2, xRIP, JMPTABL_START3); + SLLI(x2, x2, 3); + LD(x3, x3, x2); + LUI(x4, JMPTABLE_MASK2); // x4 = mask + SRLI(x2, xRIP, JMPTABL_START2); + AND(x2, x2, x4); + SLLI(x2, x2, 3); + LD(x3, x3, x2); + if(JMPTABLE_MASK2!=JMPTABLE_MASK1) { + LUI(x4, JMPTABLE_MASK1); // x4 = mask + } + SRLI(x2, xRIP, JMPTABL_START1); + AND(x2, x2, x4); + SLLI(x2, x2, 3); + LD(x3, x3, x2); + if(JMPTABLE_MASK1!=JMPTABLE_MASK0) { + LUI(x4, JMPTABLE_MASK0); // x4 = mask + } + AND(x2, x2, x4); + SLLI(x2, x2, 3); + LD(x2, x3, x2); + } else { + uintptr_t p = getJumpTableAddress64(ip); + MAYUSE(p); + TABLE64(x3, p); + GETIP_(ip); + LD(x2, x3, 0); + } + if(reg!=A1) { + MV(A1, xRIP); + } + CLEARIP(); + #ifdef HAVE_TRACE + //MOVx(x3, 15); no access to PC reg + #endif + SMEND(); + JALR(x2); // save LR... +} + +void fpu_reset(dynarec_rv64_t* dyn) +{ + //TODO +} + +void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n) +{ + //TODO +} + +void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3) +{ + //TODO +} + +// propagate ST stack state, especial stack pop that are defered +void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst) +{ + //TODO +} + +void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1) +{ + // TODO +} + +void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3) +{ + //TODO +} + +#ifdef HAVE_TRACE +void fpu_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3) +{ + //TODO +} +#endif +void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) +{ + //TODO +} +void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07) +{ + //TODO +} + +void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val) +{ + int32_t up=(val>>12); + int32_t r = val-(up<<12); + // check if there is the dreaded sign bit on imm12 + if(r&0b100000000000 && r!=0xffffffff) { + ++up; + r = val-(up<<12); + } + LUI(reg, up); + if(r) { + ADDI(reg, reg, r); + } +} +void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val) +{ + if(((val<<(64-12))>>(64-12))==val) { + // simple 12bit value + MOV_U12(reg, (val&0b111111111111)); + return; + } + if(((val<<32)>>32)==val) { + // 32bits value + rv64_move32(dyn, ninst, reg, val); + return; + } + if((val&0xffffffffLL)==val && (val&0x80000000)) { + // 32bits value, but with a sign bit + rv64_move32(dyn, ninst, reg, val); + ZEROUP(reg); + return; + } + //TODO: optimize that later + // Start with the upper 32bits + rv64_move32(dyn, ninst, reg, val>>32); + // now the lower part + uint32_t r = val&0xffffffff; + int s = 11; + if((r>>21)&0b11111111111) { + SLLI(reg, reg, s); + ORI(reg, reg, (r>>21)&0b11111111111); + s = 0; + } + s+=11; + if((r>>10)&0b11111111111) { + SLLI(reg, reg, s); + ORI(reg, reg, (r>>10)&0b11111111111); + s = 0; + } + s+=10; + if(r&0b1111111111) { + SLLI(reg, reg, s); + ORI(reg, reg, r&0b1111111111); + s=0; + } + if(s) { + SLLI(reg, reg, s); + } +} \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h new file mode 100644 index 00000000..2add1717 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_helper.h @@ -0,0 +1,490 @@ +#ifndef __DYNAREC_RV64_HELPER_H__ +#define __DYNAREC_RV64_HELPER_H__ + +// undef to get Close to SSE Float->int conversions +//#define PRECISE_CVT + +#if STEP == 0 +#include "dynarec_rv64_pass0.h" +#elif STEP == 1 +#include "dynarec_rv64_pass1.h" +#elif STEP == 2 +#include "dynarec_rv64_pass2.h" +#elif STEP == 3 +#include "dynarec_rv64_pass3.h" +#endif + +#include "debug.h" +#include "rv64_emitter.h" +#include "../emu/x64primop.h" + +#define F8 *(uint8_t*)(addr++) +#define F8S *(int8_t*)(addr++) +#define F16 *(uint16_t*)(addr+=2, addr-2) +#define F16S *(int16_t*)(addr+=2, addr-2) +#define F32 *(uint32_t*)(addr+=4, addr-4) +#define F32S *(int32_t*)(addr+=4, addr-4) +#define F32S64 (uint64_t)(int64_t)F32S +#define F64 *(uint64_t*)(addr+=8, addr-8) +#define PK(a) *(uint8_t*)(addr+a) +#define PK16(a) *(uint16_t*)(addr+a) +#define PK32(a) *(uint32_t*)(addr+a) +#define PK64(a) *(uint64_t*)(addr+a) +#define PKip(a) *(uint8_t*)(ip+a) + + +// Strong mem emulation helpers +// Sequence of Read will trigger a DMB on "first" read if strongmem is 2 +// Squence of Write will trigger a DMB on "last" write if strongmem is 1 +// Opcode will read +#define SMREAD() if(!dyn->smread && box64_dynarec_strongmem>1) {SMDMB();} +// Opcode will read with option forced lock +#define SMREADLOCK(lock) if(lock || (!dyn->smread && box64_dynarec_strongmem>1)) {SMDMB();} +// Opcode migh read (depend on nextop) +#define SMMIGHTREAD() if(!MODREG) {SMREAD();} +// Opcode has wrote +#define SMWRITE() dyn->smwrite=1 +// Opcode has wrote (strongmem>1 only) +#define SMWRITE2() if(box64_dynarec_strongmem>1) dyn->smwrite=1 +// Opcode has wrote with option forced lock +#define SMWRITELOCK(lock) if(lock) {SMDMB();} else dyn->smwrite=1 +// Opcode migh have wrote (depend on nextop) +#define SMMIGHTWRITE() if(!MODREG) {SMWRITE();} +// Start of sequence +#define SMSTART() SMEND() +// End of sequence +#define SMEND() if(dyn->smwrite && box64_dynarec_strongmem) {FENCE();} dyn->smwrite=0; dyn->smread=0; +// Force a Data memory barrier (for LOCK: prefix) +#define SMDMB() FENCE(); dyn->smwrite=0; dyn->smread=1 + +//LOCK_* define +#define LOCK_LOCK (int*)1 + +// GETGD get x64 register in gd +#define GETGD gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3) +//GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI +#define GETED(D) if(MODREG) { \ + ed = xRAX+(nextop&7)+(rex.b<<3); \ + wback = 0; \ + } else { \ + SMREAD() \ + addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \ + LD_I12(x1, wback, fixedaddress); \ + ed = x1; \ + } + +#ifndef MAYSETFLAGS +#define MAYSETFLAGS() +#endif + +#ifndef READFLAGS +#define READFLAGS(A) \ + +#endif + +#ifndef SETFLAGS +#define SETFLAGS(A, B) \ + +#endif +#ifndef JUMP +#define JUMP(A, C) +#endif +#ifndef BARRIER +#define BARRIER(A) +#endif +#ifndef BARRIER_NEXT +#define BARRIER_NEXT(A) +#endif + +#ifndef DEFAULT +#define DEFAULT *ok = -1; BARRIER(2) +#endif + +#ifndef TABLE64 +#define TABLE64(A, V) +#endif + +#if STEP < 2 +#define GETIP(A) +#define GETIP_(A) +#else +// put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later... +#define GETIP(A) \ + if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) { \ + uint64_t _delta_ip = (A)-dyn->last_ip; \ + dyn->last_ip += _delta_ip; \ + if(_delta_ip) { \ + ADDI(xRIP, xRIP, _delta_ip); \ + } \ + } else { \ + dyn->last_ip = (A); \ + if(dyn->last_ip<0xffffffff) { \ + MOV64x(xRIP, dyn->last_ip); \ + } else \ + TABLE64(xRIP, dyn->last_ip); \ + } +#define GETIP_(A) \ + if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) { \ + uint64_t _delta_ip = (A)-dyn->last_ip; \ + if(_delta_ip) {ADDI(xRIP, xRIP, _delta_ip);} \ + } else { \ + if((A)<0xffffffff) { \ + MOV64x(xRIP, (A)); \ + } else \ + TABLE64(xRIP, (A)); \ + } +#endif +#define CLEARIP() dyn->last_ip=0 + + +#define MODREG ((nextop&0xC0)==0xC0) + +void rv64_epilog(); +void* rv64_next(x64emu_t* emu, uintptr_t addr); + +#ifndef STEPNAME +#define STEPNAME3(N,M) N##M +#define STEPNAME2(N,M) STEPNAME3(N,M) +#define STEPNAME(N) STEPNAME2(N, STEP) +#endif + +#define native_pass STEPNAME(native_pass) + +#define dynarec64_00 STEPNAME(dynarec64_00) +#define dynarec64_0F STEPNAME(dynarec64_0F) +#define dynarec64_64 STEPNAME(dynarec64_64) +#define dynarec64_65 STEPNAME(dynarec64_65) +#define dynarec64_66 STEPNAME(dynarec64_66) +#define dynarec64_67 STEPNAME(dynarec64_67) +#define dynarec64_D8 STEPNAME(dynarec64_D8) +#define dynarec64_D9 STEPNAME(dynarec64_D9) +#define dynarec64_DA STEPNAME(dynarec64_DA) +#define dynarec64_DB STEPNAME(dynarec64_DB) +#define dynarec64_DC STEPNAME(dynarec64_DC) +#define dynarec64_DD STEPNAME(dynarec64_DD) +#define dynarec64_DE STEPNAME(dynarec64_DE) +#define dynarec64_DF STEPNAME(dynarec64_DF) +#define dynarec64_F0 STEPNAME(dynarec64_F0) +#define dynarec64_660F STEPNAME(dynarec64_660F) +#define dynarec64_6664 STEPNAME(dynarec64_6664) +#define dynarec64_66F0 STEPNAME(dynarec64_66F0) +#define dynarec64_F20F STEPNAME(dynarec64_F20F) +#define dynarec64_F30F STEPNAME(dynarec64_F30F) + +#define geted STEPNAME(geted) +#define geted32 STEPNAME(geted32) +#define geted16 STEPNAME(geted16) +#define jump_to_epilog STEPNAME(jump_to_epilog) +#define jump_to_next STEPNAME(jump_to_next) +#define ret_to_epilog STEPNAME(ret_to_epilog) +#define retn_to_epilog STEPNAME(retn_to_epilog) +#define iret_to_epilog STEPNAME(iret_to_epilog) +#define call_c STEPNAME(call_c) +#define call_n STEPNAME(call_n) +#define grab_segdata STEPNAME(grab_segdata) +#define emit_cmp8 STEPNAME(emit_cmp8) +#define emit_cmp16 STEPNAME(emit_cmp16) +#define emit_cmp32 STEPNAME(emit_cmp32) +#define emit_cmp8_0 STEPNAME(emit_cmp8_0) +#define emit_cmp16_0 STEPNAME(emit_cmp16_0) +#define emit_cmp32_0 STEPNAME(emit_cmp32_0) +#define emit_test8 STEPNAME(emit_test8) +#define emit_test16 STEPNAME(emit_test16) +#define emit_test32 STEPNAME(emit_test32) +#define emit_add32 STEPNAME(emit_add32) +#define emit_add32c STEPNAME(emit_add32c) +#define emit_add8 STEPNAME(emit_add8) +#define emit_add8c STEPNAME(emit_add8c) +#define emit_sub32 STEPNAME(emit_sub32) +#define emit_sub32c STEPNAME(emit_sub32c) +#define emit_sub8 STEPNAME(emit_sub8) +#define emit_sub8c STEPNAME(emit_sub8c) +#define emit_or32 STEPNAME(emit_or32) +#define emit_or32c STEPNAME(emit_or32c) +#define emit_xor32 STEPNAME(emit_xor32) +#define emit_xor32c STEPNAME(emit_xor32c) +#define emit_and32 STEPNAME(emit_and32) +#define emit_and32c STEPNAME(emit_and32c) +#define emit_or8 STEPNAME(emit_or8) +#define emit_or8c STEPNAME(emit_or8c) +#define emit_xor8 STEPNAME(emit_xor8) +#define emit_xor8c STEPNAME(emit_xor8c) +#define emit_and8 STEPNAME(emit_and8) +#define emit_and8c STEPNAME(emit_and8c) +#define emit_add16 STEPNAME(emit_add16) +#define emit_add16c STEPNAME(emit_add16c) +#define emit_sub16 STEPNAME(emit_sub16) +#define emit_sub16c STEPNAME(emit_sub16c) +#define emit_or16 STEPNAME(emit_or16) +#define emit_or16c STEPNAME(emit_or16c) +#define emit_xor16 STEPNAME(emit_xor16) +#define emit_xor16c STEPNAME(emit_xor16c) +#define emit_and16 STEPNAME(emit_and16) +#define emit_and16c STEPNAME(emit_and16c) +#define emit_inc32 STEPNAME(emit_inc32) +#define emit_inc16 STEPNAME(emit_inc16) +#define emit_inc8 STEPNAME(emit_inc8) +#define emit_dec32 STEPNAME(emit_dec32) +#define emit_dec16 STEPNAME(emit_dec16) +#define emit_dec8 STEPNAME(emit_dec8) +#define emit_adc32 STEPNAME(emit_adc32) +#define emit_adc32c STEPNAME(emit_adc32c) +#define emit_adc8 STEPNAME(emit_adc8) +#define emit_adc8c STEPNAME(emit_adc8c) +#define emit_adc16 STEPNAME(emit_adc16) +#define emit_adc16c STEPNAME(emit_adc16c) +#define emit_sbb32 STEPNAME(emit_sbb32) +#define emit_sbb32c STEPNAME(emit_sbb32c) +#define emit_sbb8 STEPNAME(emit_sbb8) +#define emit_sbb8c STEPNAME(emit_sbb8c) +#define emit_sbb16 STEPNAME(emit_sbb16) +#define emit_sbb16c STEPNAME(emit_sbb16c) +#define emit_neg32 STEPNAME(emit_neg32) +#define emit_neg16 STEPNAME(emit_neg16) +#define emit_neg8 STEPNAME(emit_neg8) +#define emit_shl32 STEPNAME(emit_shl32) +#define emit_shl32c STEPNAME(emit_shl32c) +#define emit_shr32 STEPNAME(emit_shr32) +#define emit_shr32c STEPNAME(emit_shr32c) +#define emit_sar32c STEPNAME(emit_sar32c) +#define emit_rol32c STEPNAME(emit_rol32c) +#define emit_ror32c STEPNAME(emit_ror32c) +#define emit_shrd32c STEPNAME(emit_shrd32c) +#define emit_shld32c STEPNAME(emit_shld32c) + +#define emit_pf STEPNAME(emit_pf) + +#define x87_do_push STEPNAME(x87_do_push) +#define x87_do_push_empty STEPNAME(x87_do_push_empty) +#define x87_do_pop STEPNAME(x87_do_pop) +#define x87_get_current_cache STEPNAME(x87_get_current_cache) +#define x87_get_cache STEPNAME(x87_get_cache) +#define x87_get_neoncache STEPNAME(x87_get_neoncache) +#define x87_get_st STEPNAME(x87_get_st) +#define x87_get_st_empty STEPNAME(x87_get_st) +#define x87_refresh STEPNAME(x87_refresh) +#define x87_forget STEPNAME(x87_forget) +#define x87_reget_st STEPNAME(x87_reget_st) +#define x87_stackcount STEPNAME(x87_stackcount) +#define x87_swapreg STEPNAME(x87_swapreg) +#define x87_setround STEPNAME(x87_setround) +#define x87_restoreround STEPNAME(x87_restoreround) +#define sse_setround STEPNAME(sse_setround) +#define mmx_get_reg STEPNAME(mmx_get_reg) +#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty) +#define sse_get_reg STEPNAME(sse_get_reg) +#define sse_get_reg_empty STEPNAME(sse_get_reg_empty) +#define sse_forget_reg STEPNAME(sse_forget_reg) +#define sse_purge07cache STEPNAME(sse_purge07cache) + +#define fpu_pushcache STEPNAME(fpu_pushcache) +#define fpu_popcache STEPNAME(fpu_popcache) +#define fpu_reset STEPNAME(fpu_reset) +#define fpu_reset_cache STEPNAME(fpu_reset_cache) +#define fpu_propagate_stack STEPNAME(fpu_propagate_stack) +#define fpu_purgecache STEPNAME(fpu_purgecache) +#define mmx_purgecache STEPNAME(mmx_purgecache) +#define x87_purgecache STEPNAME(x87_purgecache) +#ifdef HAVE_TRACE +#define fpu_reflectcache STEPNAME(fpu_reflectcache) +#endif + +#define CacheTransform STEPNAME(CacheTransform) +#define rv64_move64 STEPNAME(rv64_move64) +#define rv64_move32 STEPNAME(rv64_move32) + +/* setup r2 to address pointed by */ +uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta); + +/* setup r2 to address pointed by */ +//uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int* l, int s, int delta); + +/* setup r2 to address pointed by */ +//uintptr_t geted16(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s); + + +// generic x64 helper +void jump_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); +void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst); +//void ret_to_epilog(dynarec_rv64_t* dyn, int ninst); +//void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n); +//void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits); +//void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); +//void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w); +//void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int segment); +//void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +//void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +//void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5); +//void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +//void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +//void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5); +//void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); +//void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5); +//void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +//void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +//void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +//void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4); +//void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_or8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_add16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_sub16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_or16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_xor16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_and16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +//void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +//void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +//void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_sbb32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5); +//void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4); +//void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4); +//void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4); +//void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); +//void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +//void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4); +//void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +//void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +//void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +//void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4); +//void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); +//void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4); + +//void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4); + +// x87 helper +// cache of the local stack counter, to avoid upadte at every call +//void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch); +// fpu push. Return the Dd value to be used +//int x87_do_push(dynarec_rv64_t* dyn, int ninst, int s1, int t); +// fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it) +//void x87_do_push_empty(dynarec_rv64_t* dyn, int ninst, int s1); +// fpu pop. All previous returned Dd should be considered invalid +//void x87_do_pop(dynarec_rv64_t* dyn, int ninst, int s1); +// get cache index for a x87 reg, return -1 if cache doesn't exist +//int x87_get_current_cache(dynarec_rv64_t* dyn, int ninst, int st, int t); +// get cache index for a x87 reg, create the entry if needed +//int x87_get_cache(dynarec_rv64_t* dyn, int ninst, int populate, int s1, int s2, int a, int t); +// get neoncache index for a x87 reg +//int x87_get_neoncache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a); +// get vfpu register for a x87 reg, create the entry if needed +//int x87_get_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int t); +// get vfpu register for a x87 reg, create the entry if needed. Do not fetch the Stx if not already in cache +//int x87_get_st_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int t); +// refresh a value from the cache ->emu (nothing done if value is not cached) +//void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st); +// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached) +//void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st); +// refresh the cache value from emu +//void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st); +// swap 2 x87 regs +//void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b); +// Set rounding according to cw flags, return reg to restore flags +//int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); +// Restore round flag +//void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1); +// Set rounding according to mxcsr flags, return reg to restore flags +//int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); + +//void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3); + +void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val); +void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val); + +#if STEP < 2 +#define CHECK_CACHE() 0 +#else +#define CHECK_CACHE() (cacheupd = CacheNeedsTransform(dyn, ninst)) +#endif + +// common coproc helpers +// reset the cache +void fpu_reset(dynarec_rv64_t* dyn); +// reset the cache with n +void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n); +// propagate stack state +void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst); +// purge the FPU cache (needs 3 scratch registers) +void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3); +// purge MMX cache +void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1); +// purge x87 cache +void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3); +#ifdef HAVE_TRACE +void fpu_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3); +#endif +void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07); +void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07); + +uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog); +//uintptr_t dynarec64_65(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog); +//uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_6664(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int seg, int* ok, int* need_epilog); +//uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); +//uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); +//uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog); + +#if STEP < 2 +#define PASS2(A) +#else +#define PASS2(A) A +#endif + +#if STEP < 3 +#define PASS3(A) +#else +#define PASS3(A) A +#endif + +#if STEP < 3 +#define MAYUSE(A) (void)A +#else +#define MAYUSE(A) +#endif + +#endif //__DYNAREC_RV64_HELPER_H__ \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_jmpnext.c b/src/dynarec/rv64/dynarec_rv64_jmpnext.c new file mode 100644 index 00000000..5f58183f --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_jmpnext.c @@ -0,0 +1,13 @@ +#include <stdint.h> + +#include "rv64_emitter.h" + +#define EMIT(A) *block = (A); ++block +void CreateJmpNext(void* addr, void* next) +{ + uint32_t* block = (uint32_t*)addr; + uintptr_t diff = (intptr_t)next - (intptr_t)addr; + AUIPC(x2, diff>>12); + LD(x2, x2, diff&0b111111111111); + BR(x2); +} \ No newline at end of file diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h new file mode 100644 index 00000000..fd0f617f --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_pass0.h @@ -0,0 +1,50 @@ + +#define INIT uintptr_t sav_addr=addr +#define FINI \ + dyn->isize = addr-sav_addr; \ + dyn->insts[ninst].x64.addr = addr; \ + if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr + +#define MESSAGE(A, ...) +#define MAYSETFLAGS() dyn->insts[ninst].x64.may_set = 1 +#define READFLAGS(A) \ + dyn->insts[ninst].x64.use_flags = A; dyn->f.dfnone = 1;\ + dyn->f.pending=SF_SET +#define SETFLAGS(A,B) \ + dyn->insts[ninst].x64.set_flags = A; \ + dyn->insts[ninst].x64.state_flags = B; \ + dyn->f.pending=(B)&SF_SET_PENDING; \ + dyn->f.dfnone=((B)&SF_SET)?1:0; +#define EMIT(A) +#define JUMP(A, C) add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C +#define BARRIER(A) if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1 +#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier = A +#define NEW_INST \ + ++dyn->size; \ + if(dyn->size+3>=dyn->cap) { \ + dyn->insts = (instruction_native_t*)customRealloc(dyn->insts, sizeof(instruction_native_t)*dyn->cap*2);\ + memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_native_t)*dyn->cap); \ + dyn->cap *= 2; \ + } \ + dyn->insts[ninst].x64.addr = ip; \ + dyn->insts[ninst].f_entry = dyn->f; \ + if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;} + +#define INST_EPILOG \ + dyn->insts[ninst].f_exit = dyn->f; \ + dyn->insts[ninst].x64.has_next = (ok>0)?1:0; +#define INST_NAME(name) +#define DEFAULT \ + --dyn->size; \ + *ok = -1; \ + if(box64_dynarec_log>=LOG_INFO) {\ + dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ + (void*)ip, PKip(0), \ + PKip(1), PKip(2), PKip(3), \ + PKip(4), PKip(5), PKip(6), \ + PKip(7), PKip(8), PKip(9), \ + PKip(10),PKip(11),PKip(12), \ + PKip(13),PKip(14)); \ + printFunctionAddr(ip, " => "); \ + dynarec_log(LOG_NONE, "\n"); \ + } diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h new file mode 100644 index 00000000..9e80d7b0 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_pass1.h @@ -0,0 +1,11 @@ +#define INIT +#define FINI +#define MESSAGE(A, ...) +#define EMIT(A) +#define NEW_INST \ + dyn->insts[ninst].f_entry = dyn->f; \ + +#define INST_EPILOG \ + dyn->insts[ninst].f_exit = dyn->f + +#define INST_NAME(name) diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h new file mode 100644 index 00000000..955011d4 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_pass2.h @@ -0,0 +1,19 @@ +#define INIT dyn->native_size = 0 +#define FINI \ + if(ninst) { \ + dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \ + dyn->insts_size += 1+((dyn->insts[ninst].x64.size>dyn->insts[ninst].size)?dyn->insts[ninst].x64.size:dyn->insts[ninst].size)/15; \ + } + +#define MESSAGE(A, ...) +#define EMIT(A) dyn->insts[ninst].size+=4; dyn->native_size+=4 +#define NEW_INST \ + if(ninst) { \ + dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size); \ + if(isInstClean(dyn, ninst) && dyn->last_ip!=ip) \ + dyn->last_ip = 0; \ + dyn->insts_size += 1+((dyn->insts[ninst-1].x64.size>dyn->insts[ninst-1].size)?dyn->insts[ninst-1].x64.size:dyn->insts[ninst-1].size)/15; \ + } +#define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size; +#define INST_NAME(name) +#define TABLE64(A, V) {Table64(dyn, (V)); EMIT(0); EMIT(0);} diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h new file mode 100644 index 00000000..b9356530 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_pass3.h @@ -0,0 +1,53 @@ +#define INIT +#define FINI \ + if(ninst) \ + addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \ + addInst(dyn->instsize, &dyn->insts_size, 0, 0); +#define EMIT(A) \ + if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \ + *(uint32_t*)(dyn->block) = (uint32_t)(A); \ + dyn->block += 4; dyn->native_size += 4; \ + dyn->insts[ninst].size2 += 4 + +#define MESSAGE(A, ...) if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) +#define NEW_INST \ + if(ninst && isInstClean(dyn, ninst)) { \ + if(dyn->last_ip!=ip) dyn->last_ip = 0; \ + } \ + if(ninst) \ + addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4); +#define INST_EPILOG +#define INST_NAME(name) \ + if(box64_dynarec_dump) {\ + printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \ + dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", \ + (box64_dynarec_dump>1)?"\e[32m":"", \ + (void*)(dyn->native_start+dyn->insts[ninst].address), \ + dyn->insts[ninst].size/4, \ + ninst, \ + dyn->insts[ninst].x64.barrier, \ + dyn->insts[ninst].x64.state_flags, \ + dyn->f.pending, \ + dyn->f.dfnone, \ + dyn->insts[ninst].x64.may_set?"may":"set", \ + dyn->insts[ninst].x64.set_flags, \ + dyn->insts[ninst].x64.gen_flags, \ + dyn->insts[ninst].x64.use_flags, \ + dyn->insts[ninst].x64.need_before, \ + dyn->insts[ninst].x64.need_after, \ + dyn->smread, dyn->smwrite); \ + if(dyn->insts[ninst].pred_sz) { \ + dynarec_log(LOG_NONE, ", pred="); \ + for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)\ + dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]);\ + } \ + if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0)\ + dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);\ + if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1)\ + dynarec_log(LOG_NONE, ", jmp=out"); \ + if(dyn->last_ip) \ + dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);\ + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":""); \ + } + +#define TABLE64(A, V) {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); AUIPC(A, (val64offset>>12)); LD(A, A, (val64offset&0b111111111111));} diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h new file mode 100644 index 00000000..7b8fdec2 --- /dev/null +++ b/src/dynarec/rv64/dynarec_rv64_private.h @@ -0,0 +1,80 @@ +#ifndef __DYNAREC_RV64_PRIVATE_H_ +#define __DYNAREC_RV64_PRIVATE_H_ +#include <stdint.h> + +#include "../dynarec_private.h" + +typedef struct x64emu_s x64emu_t; +typedef struct dynablock_s dynablock_t; +typedef struct instsize_s instsize_t; + +#define BARRIER_MAYBE 8 + +typedef struct flagcache_s { + int pending; // is there a pending flags here, or to check? + int dfnone; // if defered flags is already set to df_none +} flagcache_t; + +typedef struct instruction_rv64_s { + instruction_x64_t x64; + uintptr_t address; // (start) address of the arm emited instruction + uintptr_t epilog; // epilog of current instruction (can be start of next, or barrier stuff) + int size; // size of the arm emited instruction + int size2; // size of the arm emited instrucion after pass2 + int pred_sz; // size of predecessor list + int *pred; // predecessor array + uintptr_t mark, mark2, mark3; + uintptr_t markf; + uintptr_t markseg; + uintptr_t marklock; + int pass2choice;// value for choices that are fixed on pass2 for pass3 + uintptr_t natcall; + int retn; + int barrier_maybe; + flagcache_t f_exit; // flags status at end of intruction + flagcache_t f_entry; // flags status before the instruction begin +} instruction_rv64_t; + +typedef struct dynarec_rv64_s { + instruction_rv64_t* insts; + int32_t size; + int32_t cap; + uintptr_t start; // start of the block + uint32_t isize; // size in byte of x64 instructions included + void* block; // memory pointer where next instruction is emited + uintptr_t native_start; // start of the arm code + size_t native_size; // size of emitted arm code + uintptr_t last_ip; // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something + uint64_t* table64; // table of 64bits value + int table64size;// size of table (will be appended at end of executable code) + int table64cap; + uintptr_t tablestart; + flagcache_t f; + uintptr_t* next; // variable array of "next" jump address + int next_sz; + int next_cap; + int* predecessor;// single array of all predecessor + dynablock_t* dynablock; + instsize_t* instsize; + size_t insts_size; // size of the instruction size array (calculated) + uint8_t smread; // for strongmem model emulation + uint8_t smwrite; // for strongmem model emulation + uintptr_t forward; // address of the last end of code while testing forward + uintptr_t forward_to; // address of the next jump to (to check if everything is ok) + int32_t forward_size; // size at the forward point + int forward_ninst; // ninst at the forward point +} dynarec_rv64_t; + +void add_next(dynarec_rv64_t *dyn, uintptr_t addr); +uintptr_t get_closest_next(dynarec_rv64_t *dyn, uintptr_t addr); +int is_nops(dynarec_rv64_t *dyn, uintptr_t addr, int n); +int is_instructions(dynarec_rv64_t *dyn, uintptr_t addr, int n); + +int Table64(dynarec_rv64_t *dyn, uint64_t val); // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal + +void CreateJmpNext(void* addr, void* next); + +//TODO: GO_TRACE() ! +#define GO_TRACE() + +#endif //__DYNAREC_RV64_PRIVATE_H_ \ No newline at end of file diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h new file mode 100644 index 00000000..6810a484 --- /dev/null +++ b/src/dynarec/rv64/rv64_emitter.h @@ -0,0 +1,223 @@ +#ifndef __RV64_EMITTER_H__ +#define __RV64_EMITTER_H__ +/* + RV64 Emitter + +*/ + +// RV64 ABI +/* +reg name description saver +------------------------------------------------------ +x0 zero Hard-wired zero — +x1 ra Return address Caller +x2 sp Stack pointer Callee +x3 gp Global pointer — +x4 tp Thread pointer — +x5–7 t0–2 Temporaries Caller +x8 s0/fp Saved register/frame pointer Callee +x9 s1 Saved register Callee +x10–11 a0–1 Function arguments/return val. Caller +x12–17 a2–7 Function arguments Caller +x18–27 s2–11 Saved registers Callee +x28–31 t3–6 Temporaries Caller +------------------------------------------------------- +f0–7 ft0–7 FP temporaries Caller +f8–9 fs0–1 FP saved registers Callee +f10–11 fa0–1 FP arguments/return values Caller +f12–17 fa2–7 FP arguments Caller +f18–27 fs2–11 FP saved registers Callee +f28–31 ft8–11 FP temporaries Caller +*/ +// x86 Register mapping +#define xRAX 16 +#define xRCX 17 +#define xRDX 18 +#define xRBX 19 +#define xRSP 20 +#define xRBP 21 +#define xRSI 22 +#define xRDI 23 +#define xR8 24 +#define xR9 25 +#define xR10 26 +#define xR11 27 +#define xR12 28 +#define xR13 29 +#define xR14 30 +#define xR15 31 +#define xFlags 5 +#define xRIP 6 + +// 32bits version +#define wEAX xRAX +#define wECX xRCX +#define wEDX xRDX +#define wEBX xRBX +#define wESP xRSP +#define wEBP xRBP +#define wESI xRSI +#define wEDI xRDI +#define wR8 xR8 +#define wR9 xR9 +#define wR10 xR10 +#define wR11 xR11 +#define wR12 xR12 +#define wR13 xR13 +#define wR14 xR14 +#define wR15 xR15 +#define wFlags xFlags +// scratch registers +#define x1 11 +#define x2 12 +#define x3 13 +#define x4 14 +#define x5 15 +// used to clear the upper 32bits +#define xMASK 7 +// 32bits version of scratch +#define w1 x1 +#define w2 x2 +#define w3 x3 +#define w4 x4 +#define w5 x5 +#define w6 x6 +// emu is r10 +#define xEmu 10 +// RV64 RA +#define xRA 1 +#define xSP 2 +// RV64 args +#define A0 10 +#define A1 11 +// xZR reg is 0 +#define xZR 0 +#define wZR xZR + +// MOVE64x is quite complex, so use a function for this +#define MOV64x(A, B) rv64_move64(dyn, ninst, A, B) + +// ZERO the upper part +#define ZEROUP(r) AND(r, r, xMASK) + +#define R_type(funct7, rs2, rs1, funct3, rd, opcode) ((funct7)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode)) +#define I_type(imm12, rs1, funct3, rd, opcode) ((imm12)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode)) +#define S_type(imm12, rs2, rs1, funct3, opcode) (((imm12)>>5)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | ((imm12)&31)<<7 | (opcode)) +#define B_type(imm13, rs2, rs1, funct3, opcode) ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs)<<20 | (rs1)<<15 | (funct3)<<13 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode)) +#define U_type(imm32, rd, opcode) (((imm32)>>12)<<12 | (rd)<<7 | (opcode)) +#define J_type(imm21, rd, opcode) ((((imm21)>>20)&1)<<31 | (((imm21)>>1)&0b1111111111)<<21 | (((imm21)>>11)&1)<<20 | (((imm21)>>12)&0b11111111)<<12 | (rd)<<7 | (opcode)) + +// RV32I +// put imm20 in the [31:12] bits of rd, zero [11:0] and sign extend bits31 +#define LUI(rd, imm20) EMIT(U_type((imm20)<<12, rd, 0b0110111)) +// put PC+imm20 in rd +#define AUIPC(rd, imm20) EMIT(U_type((imm20)>>12, rd, 0b0010111)) + +#define JAL_gen(rd, imm21) J_type(imm21, rd, 0b1101111) +// Unconditionnal branch, no return address set +#define B(imm21) EMIT(JAL_gen(xZR, imm21)) +// Uncondiftionnal branch, return set to xRA +#define JAL(imm21) EMIT(JAL_gen(xRA, imm21)) + +#define JALR_gen(rd, rs1, imm12) I_type(imm12, rs1, 0b000, rd, 0b1100111) +// Unconditionnal branch to r, no return address set +#define BR(r) EMIT(JALR_gen(xZR, r, 0)) +// Unconditionnal branch to r+i12, no return address set +#define BR_I12(r, imm12) EMIT(JALR_gen(xZR, r, (imm12)&0b111111111111)) +// Unconditionnal branch to r, return address set to xRA +#define JALR(r) EMIT(JALR_gen(xRA, r, 0)) +// Unconditionnal branch to r+i12, return address set to xRA +#define JALR_I12(r, imm12) EMIT(JALR_gen(xRA, r, (imm12)&0b111111111111)) + +// rd = rs1 + imm12 +#define ADDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0010011)) +// rd = rs1 - imm12 (pseudo instruction) +#define SUBI(rd, rs1, imm12) EMIT(I_type((-(imm12))&0b111111111111, rs1, 0b000, rd, 0b0010011)) +// rd = (rs1<imm12)?1:0 +#define SLTI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b010, rd, 0b0010011)) +// rd = (rs1<imm12)?1:0 unsigned +#define SLTIU(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b011, rd, 0b0010011)) +// rd = rs1 ^ imm12 +#define XORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b100, rd, 0b0010011)) +// rd = rs1 | imm12 +#define ORI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b110, rd, 0b0010011)) +// rd = rs1 & imm12 +#define ANDI(rd, rs1, imm12) EMIT(I_type((imm12)&0b111111111111, rs1, 0b101, rd, 0b0010011)) + +// rd = imm12 +#define MOV_U12(rd, imm12) ADDI(rd, xZR, imm12) +// nop +#define NOP() ADDI(xZR, xZR, 0) + +// rd = rs1 + rs2 +#define ADD(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0110011)) +// rd = rs1 - rs2 +#define SUB(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011)) +// rd = rs1<<rs2 +#define SLL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011)) +// rd = (rs1<rs2)?1:0 +#define SLT(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b010, rd, 0b0110011)) +// rd = (rs1<rs2)?1:0 Unsigned +#define SLTU(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b011, rd, 0b0110011)) +// rd = rs1 ^ rs2 +#define XOR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b100, rd, 0b0110011)) +// rd = rs1>>rs2 logical +#define SRL(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0110011)) +// rd = rs1>>rs2 aritmetic +#define SRA(rd, rs1, rs2) EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0110011)) +// rd = rs1 | rs2 +#define OR(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b110, rd, 0b0110011)) +// rd = rs1 & rs2 +#define AND(rd, rs1, rs2) EMIT(R_type(0b0000000, rs2, rs1, 0b111, rd, 0b0110011)) + +// rd = rs1 (pseudo instruction) +#define MV(rd, rs1) ADDI(rd, rs1, 0) +// rd = rs1 (pseudo instruction) +#define MVxw(rd, rs1) if(rex.w) {MV(rd, rs1); } else {AND(rd, rs1, xMASK);} +// rd = !rs1 +#define NOT(rd, rs1) XORI(rd, rs1, -1) +// rd = -rs1 +#define NEG(rd, rs1) SUB(rd, xZR, rs1) + +// rd = 4-bytes[rs1+imm12] signed extended +#define LW(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b010, rd, 0b0000011)) +// rd = 2-bytes[rs1+imm12] signed extended +#define LH(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b001, rd, 0b0000011)) +// rd = byte[rs1+imm12] signed extended +#define LB(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b000, rd, 0b0000011)) +// rd = 2-bytes[rs1+imm12] zero extended +#define LHU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b101, rd, 0b0000011)) +// rd = byte[rs1+imm12] zero extended +#define LBU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b100, rd, 0b0000011)) +// byte[rs1+imm12] = rs2 +#define SB(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b000, 0b0100011)) +// 2-bytes[rs1+imm12] = rs2 +#define SH(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b001, 0b0100011)) +// 4-bytes[rs1+imm12] = rs2 +#define SW(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011)) + +#define FENCE_gen(pred, succ) (((pred)<<24) | ((succ)<<20) | 0b0001111) +#define FENCE() EMIT(FENCE_gen(3, 3)) + +#define FENCE_I_gen() ((0b001<<12) | 0b0001111) +#define FENCE_I() EMIT(FENCE_I_gen()) + +// RV64I +#define LWU(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b110, rd, 0b0000011)) + +// rd = [rs1 + imm12] +#define LD(rd, rs1, imm12) EMIT(I_type(imm12, rs1, 0b011, rd, 0b0000011)) +// [rs1 + imm12] = rs2 +#define SD(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b011, 0b0100011)) +// [rs1 + imm12] = rs2 +#define SDxw(rs2, rs1, imm12) EMIT(S_type(imm12, rs2, rs1, 0b010+rex.w, 0b0100011)) + +// Shift Left Immediate +#define SLLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b001, rd, 0b0010011)) +// Shift Right Logical Immediate +#define SRLI(rd, rs1, imm6) EMIT(I_type(imm6, rs1, 0b101, rd, 0b0010011)) +// Shift Right Aritmetic Immediate +#define SRAI(rd, rs1, imm6) EMIT(I_type((imm6)|(0b010000<<6), rs1, 0b101, rd, 0b0010011)) + + +#endif //__RV64_EMITTER_H__ \ No newline at end of file diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S new file mode 100644 index 00000000..610a2483 --- /dev/null +++ b/src/dynarec/rv64/rv64_epilog.S @@ -0,0 +1,61 @@ +//riscv epilog for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global rv64_epilog +rv64_epilog: + //update register -> emu + sd x16, (a0) + sd x17, 8(a0) + sd x18, 16(a0) + sd x19, 24(a0) + sd x20, 32(a0) + sd x21, 40(a0) + sd x22, 48(a0) + sd x23, 56(a0) + sd x24, 64(a0) + sd x25, 72(a0) + sd x26, 80(a0) + sd x27, 88(a0) + sd x28, 96(a0) + sd x29, 104(a0) + sd x30, 112(a0) + sd x31, 120(a0) + sd x5, 128(a0) //xFlags + sd x6, 136(a0) // put back reg value in emu, including EIP (so x27 must be EIP now) + //restore all used register + ld ra, (sp) // save ra + ld x8, 8(sp) // save fp + ld x18, 16(sp) + ld x19, 24(sp) + ld x20, 32(sp) + ld x21, 40(sp) + ld x22, 48(sp) + ld x23, 56(sp) + ld x24, 64(sp) + ld x25, 72(sp) + ld x26, 80(sp) + ld x27, 88(sp) + fsd f8, 96(sp) + fsd f9, 104(sp) + addi sp, sp, (8 * 14) + //end, return... + ret + + +.global rv64_epilog_fast +rv64_epilog_fast: + //restore all used register + ld ra, (sp) // save ra + ld x8, 8(sp) // save fp + ld x16, 16(sp) + ld x17, 24(sp) + fld f8, 32(sp) + fld f9, 40(sp) + addi sp, sp, (8 * (2+4)) + //end, return... + ret diff --git a/src/dynarec/rv64/rv64_lock.S b/src/dynarec/rv64/rv64_lock.S new file mode 100644 index 00000000..c1d3138d --- /dev/null +++ b/src/dynarec/rv64/rv64_lock.S @@ -0,0 +1,173 @@ +// RV64 lock helper +// there is 2 part: read and write +// write return 0 on success, 1 on fail (value has been changed) + +.text +.align 4 + +.global rv64_lock_xchg +.global rv64_lock_xchg_d +.global rv64_lock_storeifnull +.global rv64_lock_storeifnull_d +.global rv64_lock_storeifref +.global rv64_lock_storeifref_d +.global rv64_lock_storeifref2_d +.global rv64_lock_decifnot0b +.global rv64_lock_storeb +.global rv64_lock_incif0 +.global rv64_lock_decifnot0 +.global rv64_lock_store +.global rv64_lock_cas_d +.global rv64_lock_cas_dd +.global rv64_lock_cas_dq + +rv64_lock_xchg: + // address is a0, value is a1, return old value in a0 + amoswap.d.aqrl a0, a1, (a0) + ret + +rv64_lock_xchg_d: + // address is a0, value is a1, return old value in a0 + amoswap.w.aqrl a0, a1, (a0) + ret + +rv64_lock_storeifnull: + // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value + fence rw, rw +1: + lr.d a2, (a0) + bnez a2, 2f + sc.d a3, a1, (a0) + bnez a3, 1b +2: + mv a0, a2 + ret + +rv64_lock_storeifnull_d: + // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value + fence rw, rw +1: + lr.w a2, (a0) + bnez a2, 2f + sc.w a3, a1, (a0) + bnez a3, 1b +2: + mv a0, a2 + ret + +rv64_lock_storeifref: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value) + fence rw, rw +1: + lr.d a3, (a0) + bne a2, a3, 2f + sc.d a4, a1, (a0) + bnez a4, 1b + fence rw, rw + mv a0, a1 + ret +2: + fence rw, rw + mv a0, a3 + ret + +rv64_lock_storeifref_d: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value) + fence rw, rw +1: + lr.w a3, (a0) + bne a2, a3, 2f + sc.w a4, a1, (a0) + bnez a4, 1b + mv a0, a1 + ret +2: + mv a0, a3 + ret + +rv64_lock_storeifref2_d: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value + fence rw, rw +1: + lr.w a3, (a0) + bne a2, a3, 2f + sc.w a4, a1, (a0) + bnez a4, 1b +2: + mv a0, a3 + ret + +rv64_lock_decifnot0b: + fence rw, rw +1: + lr.w a1, (a0) + andi a1, a1, 0xff + beqz a1, 2f + addi a1, a1, -1 + sc.w a2, a1, (a0) + bnez a2, 1b +2: + ret + +rv64_lock_storeb: + sb a1, 0(a0) + fence rw, rw + ret + +rv64_lock_decifnot0: + fence rw, rw +1: + lr.w a1, (a0) + beqz a1, 2f + addi a1, a1, -1 + sc.w a2, a1, (a0) + bnez a2, 1b +2: + mv a0, a1 + ret + +rv64_lock_incif0: + fence rw, rw +1: + lr.w a1, (a0) + bnez a1, 2f + addi a1, a1, 1 + sc.w a2, a1, (a0) + bnez a2, 1b +2: + mv a0, a1 + ret + +rv64_lock_store: + sw a1, 0(a0) + fence rw, rw + ret + +rv64_lock_cas_d: + lr.w t0, (a0) + bne t0, a1, 1f + sc.w a0, a2, (a0) + ret +1: + li a0, 1 + ret + +rv64_lock_cas_dd: + lr.d t0, (a0) + bne t0, a1, 1f + sc.d a0, a2, (a0) + ret +1: + li a0, 1 + ret + +rv64_lock_cas_dq: + mv a4, a0 + lr.d t0, (a0) + bne t0, a1, 1f + sc.d a0, a2, (a0) + sd a3,(a4) + ret +1: + li a0, 1 + ret \ No newline at end of file diff --git a/src/dynarec/rv64/rv64_lock.h b/src/dynarec/rv64/rv64_lock.h new file mode 100644 index 00000000..71600e57 --- /dev/null +++ b/src/dynarec/rv64/rv64_lock.h @@ -0,0 +1,60 @@ +#ifndef __RV64_LOCK__H__ +#define __RV64_LOCK__H__ +#include <stdint.h> + +// Atomicaly store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned +extern int rv64_lock_cas_d(void* p, int32_t ref, int32_t val); + +// Atomicaly store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned +extern int rv64_lock_cas_dd(void* p, int64_t ref, int64_t val); + +// Atomicaly exchange value at [p] with val, return old p +extern uintptr_t rv64_lock_xchg(void* p, uintptr_t val); + +// Atomicaly exchange value at [p] with val, return old p +extern uint32_t rv64_lock_xchg_d(void* p, uint32_t val); + +// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value +extern uint32_t rv64_lock_storeifnull_d(void*p, uint32_t val); + +// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value +extern void* rv64_lock_storeifnull(void*p, void* val); + +// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern void* rv64_lock_storeifref(void*p, void* val, void* ref); + +// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern uint32_t rv64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref); + +// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern uint32_t rv64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref); + +// decrement atomicaly the byte at [p] (but only if p not 0) +extern void rv64_lock_decifnot0b(void*p); + +// atomic store (with memory barrier) +extern void rv64_lock_storeb(void*p, uint8_t b); + +// increment atomicaly the int at [p] only if it was 0. Return the old value of [p] +extern int rv64_lock_incif0(void*p); + +// decrement atomicaly the int at [p] (but only if p not 0) +extern int rv64_lock_decifnot0(void*p); + +// atomic store (with memory barrier) +extern void rv64_lock_store(void*p, uint32_t v); + +// (mostly) Atomicaly store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned +extern int rv64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2); + +// Not defined in assembler but in dynarec_rv64_functions +uint8_t extract_byte(uint32_t val, void* address); +uint32_t insert_byte(uint32_t val, uint8_t b, void* address); +uint16_t extract_half(uint32_t val, void* address); +uint32_t insert_half(uint32_t val, uint16_t h, void* address); + +uint8_t rv64_lock_xchg_b(void* addr, uint8_t v); +extern int rv64_lock_cas_b(void* p, uint8_t ref, uint8_t val); +extern int rv64_lock_cas_h(void* p, uint16_t ref, uint16_t val); + +#endif //__RV64_LOCK__H__ diff --git a/src/dynarec/rv64/rv64_next.S b/src/dynarec/rv64/rv64_next.S new file mode 100644 index 00000000..d9cab836 --- /dev/null +++ b/src/dynarec/rv64/rv64_next.S @@ -0,0 +1,54 @@ +//riscv update linker table for dynarec +//called with pointer to emu as 1st parameter +//and address of table to as 2nd parameter +//ip is at r12 + +.text +.align 4 + +.extern LinkNext + +.global rv64_next + + .8byte 0 // NULL pointer before rv64_next, for getDB +rv64_next: + // emu is a0 + // IP address is a1 + addi sp, sp, -(8 * 10) + sd a0, (sp) + sd a1, 8(sp) + sd x5, 16(sp) + sd x6, 24(sp) + sd x16, 32(sp) + sd x17, 40(sp) + sd x28, 48(sp) + sd x29, 56(sp) + sd x30, 64(sp) + sd x31, 72(sp) + + mv a2, ra // "from" is in ra, so put in a2 + addi a3, sp, 24 // a3 is address to change rip + // call the function +1: + auipc a4, %pcrel_hi(LinkNext) + jalr a4, %pcrel_lo(1b) + // preserve return value + mv a3, a0 + // pop regs + ld a0, (sp) + ld a1, 8(sp) + ld x5, 16(sp) + ld x6, 24(sp) + ld x16, 32(sp) + ld x17, 40(sp) + ld x28, 48(sp) + ld x29, 56(sp) + ld x30, 64(sp) + ld x31, 72(sp) + addi sp, sp, (8 * 10) + // setup xMASK + xori x7, x0, -1 + srli x7, x7, 32 + // return offset is jump address + jr a3 + diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c new file mode 100644 index 00000000..ce767298 --- /dev/null +++ b/src/dynarec/rv64/rv64_printer.c @@ -0,0 +1,1368 @@ +#include <assert.h> +#include <stddef.h> +#include <string.h> +#include <stdio.h> + +#include "rv64_printer.h" +#include "debug.h" + +typedef struct { + int8_t rd; + int8_t rs1; + int8_t rs2; + int8_t rs3; + int32_t imm; + uint16_t csr; + char *name; + bool rvc; + bool f; +} insn_t; + +static const char gpnames[32][9] = { + "zero", "ra", "sp", "gp", "tp", "t0_flags", "t1_rip", "t2", + "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5", + "a6_rax", "a7_rcx", "s2_rdx", "s3_rbx", "s4_rsp", "s5_rbp", "s6_rsi", "s7_rdi", + "s8_r8", "s9_r9", "s10_r10", "s11_r11", "t3_r12", "t4_r13", "t5_r14", "t6_r15", +}; + +static const char fpnames[32][5] = { + "ft0", "ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7", + "fs0", "fs1", "fa0", "fa1", "fa2", "fa3", "fa4", "fa5", + "fa6", "fa7", "fs2", "fs3", "fs4", "fs5", "fs6", "fs7", + "fs8", "fs9", "fs10", "fs11", "ft8", "ft9", "ft10", "ft11", +}; + +#define QUADRANT(data) (((data) >> 0) & 0x3 ) + +/** + * normal types +*/ +#define OPCODE(data) (((data) >> 2) & 0x1f) +#define RD(data) (((data) >> 7) & 0x1f) +#define RS1(data) (((data) >> 15) & 0x1f) +#define RS2(data) (((data) >> 20) & 0x1f) +#define RS3(data) (((data) >> 27) & 0x1f) +#define FUNCT2(data) (((data) >> 25) & 0x3 ) +#define FUNCT3(data) (((data) >> 12) & 0x7 ) +#define FUNCT7(data) (((data) >> 25) & 0x7f) +#define IMM116(data) (((data) >> 26) & 0x3f) + +static inline insn_t insn_utype_read(uint32_t data) +{ + return (insn_t) { + .imm = (int32_t)data & 0xfffff000, + .rd = RD(data), + }; +} + +static inline insn_t insn_itype_read(uint32_t data) +{ + return (insn_t) { + .imm = (int32_t)data >> 20, + .rs1 = RS1(data), + .rd = RD(data), + }; +} + +static inline insn_t insn_jtype_read(uint32_t data) +{ + uint32_t imm20 = (data >> 31) & 0x1; + uint32_t imm101 = (data >> 21) & 0x3ff; + uint32_t imm11 = (data >> 20) & 0x1; + uint32_t imm1912 = (data >> 12) & 0xff; + + int32_t imm = (imm20 << 20) | (imm1912 << 12) | (imm11 << 11) | (imm101 << 1); + imm = (imm << 11) >> 11; + + return (insn_t) { + .imm = imm, + .rd = RD(data), + }; +} + +static inline insn_t insn_btype_read(uint32_t data) +{ + uint32_t imm12 = (data >> 31) & 0x1; + uint32_t imm105 = (data >> 25) & 0x3f; + uint32_t imm41 = (data >> 8) & 0xf; + uint32_t imm11 = (data >> 7) & 0x1; + + int32_t imm = (imm12 << 12) | (imm11 << 11) |(imm105 << 5) | (imm41 << 1); + imm = (imm << 19) >> 19; + + return (insn_t) { + .imm = imm, + .rs1 = RS1(data), + .rs2 = RS2(data), + }; +} + +static inline insn_t insn_rtype_read(uint32_t data) +{ + return (insn_t) { + .rs1 = RS1(data), + .rs2 = RS2(data), + .rd = RD(data), + }; +} + +static inline insn_t insn_stype_read(uint32_t data) +{ + uint32_t imm115 = (data >> 25) & 0x7f; + uint32_t imm40 = (data >> 7) & 0x1f; + + int32_t imm = (imm115 << 5) | imm40; + imm = (imm << 20) >> 20; + return (insn_t) { + .imm = imm, + .rs1 = RS1(data), + .rs2 = RS2(data), + }; +} + +static inline insn_t insn_csrtype_read(uint32_t data) +{ + return (insn_t) { + .csr = data >> 20, + .rs1 = RS1(data), + .rd = RD(data), + }; +} + +static inline insn_t insn_fprtype_read(uint32_t data) +{ + return (insn_t) { + .rs1 = RS1(data), + .rs2 = RS2(data), + .rs3 = RS3(data), + .rd = RD(data), + }; +} + +/** + * compressed types +*/ +#define COPCODE(data) (((data) >> 13) & 0x7 ) +#define CFUNCT1(data) (((data) >> 12) & 0x1 ) +#define CFUNCT2LOW(data) (((data) >> 5) & 0x3 ) +#define CFUNCT2HIGH(data) (((data) >> 10) & 0x3 ) +#define RP1(data) (((data) >> 7) & 0x7 ) +#define RP2(data) (((data) >> 2) & 0x7 ) +#define RC1(data) (((data) >> 7) & 0x1f) +#define RC2(data) (((data) >> 2) & 0x1f) + +static inline insn_t insn_catype_read(uint16_t data) +{ + return (insn_t) { + .rd = RP1(data) + 8, + .rs2 = RP2(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_crtype_read(uint16_t data) +{ + return (insn_t) { + .rs1 = RC1(data), + .rs2 = RC2(data), + .rvc = true, + }; +} + +static inline insn_t insn_citype_read(uint16_t data) +{ + uint32_t imm40 = (data >> 2) & 0x1f; + uint32_t imm5 = (data >> 12) & 0x1; + int32_t imm = (imm5 << 5) | imm40; + imm = (imm << 26) >> 26; + + return (insn_t) { + .imm = imm, + .rd = RC1(data), + .rvc = true, + }; +} + +static inline insn_t insn_citype_read2(uint16_t data) +{ + uint32_t imm86 = (data >> 2) & 0x7; + uint32_t imm43 = (data >> 5) & 0x3; + uint32_t imm5 = (data >> 12) & 0x1; + + int32_t imm = (imm86 << 6) | (imm43 << 3) | (imm5 << 5); + + return (insn_t) { + .imm = imm, + .rd = RC1(data), + .rvc = true, + }; +} + +static inline insn_t insn_citype_read3(uint16_t data) +{ + uint32_t imm5 = (data >> 2) & 0x1; + uint32_t imm87 = (data >> 3) & 0x3; + uint32_t imm6 = (data >> 5) & 0x1; + uint32_t imm4 = (data >> 6) & 0x1; + uint32_t imm9 = (data >> 12) & 0x1; + + int32_t imm = (imm5 << 5) | (imm87 << 7) | (imm6 << 6) | (imm4 << 4) | (imm9 << 9); + imm = (imm << 22) >> 22; + + return (insn_t) { + .imm = imm, + .rd = RC1(data), + .rvc = true, + }; +} + +static inline insn_t insn_citype_read4(uint16_t data) +{ + uint32_t imm5 = (data >> 12) & 0x1; + uint32_t imm42 = (data >> 4) & 0x7; + uint32_t imm76 = (data >> 2) & 0x3; + + int32_t imm = (imm5 << 5) | (imm42 << 2) | (imm76 << 6); + + return (insn_t) { + .imm = imm, + .rd = RC1(data), + .rvc = true, + }; +} + +static inline insn_t insn_citype_read5(uint16_t data) +{ + uint32_t imm1612 = (data >> 2) & 0x1f; + uint32_t imm17 = (data >> 12) & 0x1; + + int32_t imm = (imm1612 << 12) | (imm17 << 17); + imm = (imm << 14) >> 14; + return (insn_t) { + .imm = imm, + .rd = RC1(data), + .rvc = true, + }; +} + +static inline insn_t insn_cbtype_read(uint16_t data) +{ + uint32_t imm5 = (data >> 2) & 0x1; + uint32_t imm21 = (data >> 3) & 0x3; + uint32_t imm76 = (data >> 5) & 0x3; + uint32_t imm43 = (data >> 10) & 0x3; + uint32_t imm8 = (data >> 12) & 0x1; + + int32_t imm = (imm8 << 8) | (imm76 << 6) | (imm5 << 5) | (imm43 << 3) | (imm21 << 1); + imm = (imm << 23) >> 23; + + return (insn_t) { + .imm = imm, + .rs1 = RP1(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_cbtype_read2(uint16_t data) +{ + uint32_t imm40 = (data >> 2) & 0x1f; + uint32_t imm5 = (data >> 12) & 0x1; + int32_t imm = (imm5 << 5) | imm40; + imm = (imm << 26) >> 26; + + return (insn_t) { + .imm = imm, + .rd = RP1(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_cstype_read(uint16_t data) +{ + uint32_t imm76 = (data >> 5) & 0x3; + uint32_t imm53 = (data >> 10) & 0x7; + + int32_t imm = ((imm76 << 6) | (imm53 << 3)); + + return (insn_t) { + .imm = imm, + .rs1 = RP1(data) + 8, + .rs2 = RP2(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_cstype_read2(uint16_t data) +{ + uint32_t imm6 = (data >> 5) & 0x1; + uint32_t imm2 = (data >> 6) & 0x1; + uint32_t imm53 = (data >> 10) & 0x7; + + int32_t imm = ((imm6 << 6) | (imm2 << 2) | (imm53 << 3)); + + return (insn_t) { + .imm = imm, + .rs1 = RP1(data) + 8, + .rs2 = RP2(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_cjtype_read(uint16_t data) +{ + uint32_t imm5 = (data >> 2) & 0x1; + uint32_t imm31 = (data >> 3) & 0x7; + uint32_t imm7 = (data >> 6) & 0x1; + uint32_t imm6 = (data >> 7) & 0x1; + uint32_t imm10 = (data >> 8) & 0x1; + uint32_t imm98 = (data >> 9) & 0x3; + uint32_t imm4 = (data >> 11) & 0x1; + uint32_t imm11 = (data >> 12) & 0x1; + + int32_t imm = ((imm5 << 5) | (imm31 << 1) | (imm7 << 7) | (imm6 << 6) | + (imm10 << 10) | (imm98 << 8) | (imm4 << 4) | (imm11 << 11)); + imm = (imm << 20) >> 20; + return (insn_t) { + .imm = imm, + .rvc = true, + }; +} + +static inline insn_t insn_cltype_read(uint16_t data) +{ + uint32_t imm6 = (data >> 5) & 0x1; + uint32_t imm2 = (data >> 6) & 0x1; + uint32_t imm53 = (data >> 10) & 0x7; + + int32_t imm = (imm6 << 6) | (imm2 << 2) | (imm53 << 3); + + return (insn_t) { + .imm = imm, + .rs1 = RP1(data) + 8, + .rd = RP2(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_cltype_read2(uint16_t data) +{ + uint32_t imm76 = (data >> 5) & 0x3; + uint32_t imm53 = (data >> 10) & 0x7; + + int32_t imm = (imm76 << 6) | (imm53 << 3); + + return (insn_t) { + .imm = imm, + .rs1 = RP1(data) + 8, + .rd = RP2(data) + 8, + .rvc = true, + }; +} + +static inline insn_t insn_csstype_read(uint16_t data) +{ + uint32_t imm86 = (data >> 7) & 0x7; + uint32_t imm53 = (data >> 10) & 0x7; + + int32_t imm = (imm86 << 6) | (imm53 << 3); + + return (insn_t) { + .imm = imm, + .rs2 = RC2(data), + .rvc = true, + }; +} + +static inline insn_t insn_csstype_read2(uint16_t data) +{ + uint32_t imm76 = (data >> 7) & 0x3; + uint32_t imm52 = (data >> 9) & 0xf; + + int32_t imm = (imm76 << 6) | (imm52 << 2); + + return (insn_t) { + .imm = imm, + .rs2 = RC2(data), + .rvc = true, + }; +} + +static inline insn_t insn_ciwtype_read(uint16_t data) +{ + uint32_t imm3 = (data >> 5) & 0x1; + uint32_t imm2 = (data >> 6) & 0x1; + uint32_t imm96 = (data >> 7) & 0xf; + uint32_t imm54 = (data >> 11) & 0x3; + + int32_t imm = (imm3 << 3) | (imm2 << 2) | (imm96 << 6) | (imm54 << 4); + + return (insn_t) { + .imm = imm, + .rd = RP2(data) + 8, + .rvc = true, + }; +} + +#define RN(r) insn.f ? fpnames[insn.r] : gpnames[insn.r] + +#define PRINT_none() snprintf(buff, sizeof(buff), "%s", insn.name); return buff +#define PRINT_rd_rs1_rs2() snprintf(buff, sizeof(buff), "%s\t%s, %s, %s", insn.name, RN(rd), RN(rs1), RN(rs2)); return buff +#define PRINT_rd_rs1_rs2_rs3() snprintf(buff, sizeof(buff), "%s\t%s, %s, %s, %s", insn.name, RN(rd), RN(rs1), RN(rs2), RN(rs3)); return buff +#define PRINT_rd_rs1_imm() snprintf(buff, sizeof(buff), "%s\t%s, %s, %d", insn.name, RN(rd), RN(rs1), insn.imm); return buff +#define PRINT_rd_rs1_immx() snprintf(buff, sizeof(buff), "%s\t%s, %s, 0x%x", insn.name, RN(rd), RN(rs1), insn.imm); return buff +#define PRINT_rd_imm_rs1() snprintf(buff, sizeof(buff), "%s\t%s, %d(%s)", insn.name, RN(rd), insn.imm, RN(rs1)); return buff +#define PRINT_rs2_imm_rs1() snprintf(buff, sizeof(buff), "%s\t%s, %d(%s)", insn.name, RN(rs2), insn.imm, RN(rs1)); return buff +#define PRINT_rd_imm() snprintf(buff, sizeof(buff), "%s\t%s, %d", insn.name, RN(rd), insn.imm); return buff +#define PRINT_rd_immx() snprintf(buff, sizeof(buff), "%s\t%s, 0x%x", insn.name, RN(rd), insn.imm); return buff +#define PRINT_rs1_rs2_imm() snprintf(buff, sizeof(buff), "%s\t%s, %s, %d", insn.name, RN(rs1), RN(rs2), insn.imm); return buff +#define PRINT_fd_fs1() snprintf(buff, sizeof(buff), "%s\t%s, %s", insn.name, fpnames[insn.rd], fpnames[insn.rs1]); return buff +#define PRINT_xd_fs1() snprintf(buff, sizeof(buff), "%s\t%s, %s", insn.name, gpnames[insn.rd], fpnames[insn.rs1]); return buff +#define PRINT_fd_xs1() snprintf(buff, sizeof(buff), "%s\t%s, %s", insn.name, fpnames[insn.rd], gpnames[insn.rs1]); return buff + +// TODO: display csr name +#define PRINT_rd_csr_rs1() snprintf(buff, sizeof(buff), "%s\t%d, %s", insn.name, RN(rd), insn.csr, RN(rs1)); return buff +#define PRINT_rd_csr_uimm() snprintf(buff, sizeof(buff), "%s\t%d, %d", insn.name, RN(rd), insn.csr, (uint32_t)insn.imm); return buff + +const char* rv64_print(uint32_t data, uintptr_t addr) +{ + static char buff[200] = {0}; + + insn_t insn = { 0 }; + uint32_t quadrant = QUADRANT(data); + switch (quadrant) { + case 0x0: { + uint32_t copcode = COPCODE(data); + + switch (copcode) { + case 0x0: /* C.ADDI4SPN */ + insn = insn_ciwtype_read(data); + insn.rs1 = 2; /* sp */ + insn.name = "addi"; + assert(insn.imm != 0); + PRINT_rd_rs1_imm(); + case 0x1: /* C.FLD */ + insn = insn_cltype_read2(data); + insn.name = "fld"; + insn.f = true; + PRINT_rd_imm_rs1(); + case 0x2: /* C.LW */ + insn = insn_cltype_read(data); + insn.name = "lw"; + PRINT_rd_imm_rs1(); + case 0x3: /* C.LD */ + insn = insn_cltype_read2(data); + insn.name = "ld"; + PRINT_rd_imm_rs1(); + case 0x5: /* C.FSD */ + insn = insn_cstype_read(data); + insn.name = "fsd"; + insn.f = true; + PRINT_rs2_imm_rs1(); + case 0x6: /* C.SW */ + insn = insn_cstype_read2(data); + insn.name = "sw"; + PRINT_rd_imm_rs1(); + case 0x7: /* C.SD */ + insn = insn_cstype_read(data); + insn.name = "sd"; + PRINT_rs2_imm_rs1(); + } + } + case 0x1: { + uint32_t copcode = COPCODE(data); + + switch (copcode) { + case 0x0: /* C.ADDI */ + insn = insn_citype_read(data); + insn.rs1 = insn.rd; + insn.name = "addi"; + PRINT_rd_rs1_imm(); + case 0x1: /* C.ADDIW */ + insn = insn_citype_read(data); + assert(insn.rd != 0); + insn.rs1 = insn.rd; + insn.name = "addiw"; + PRINT_rd_rs1_imm(); + case 0x2: /* C.LI */ + insn = insn_citype_read(data); + insn.rs1 = 0; + insn.name = "addi"; + PRINT_rd_rs1_imm(); + case 0x3: { + int32_t rd = RC1(data); + if (rd == 2) { /* C.ADDI16SP */ + insn = insn_citype_read3(data); + assert(insn.imm != 0); + insn.rs1 = insn.rd; + insn.name = "addi"; + PRINT_rd_rs1_imm(); + } else { /* C.LUI */ + insn = insn_citype_read5(data); + assert(insn.imm != 0); + insn.name = "lui"; + PRINT_rd_immx(); + } + } + case 0x4: { + uint32_t cfunct2high = CFUNCT2HIGH(data); + + switch (cfunct2high) { + case 0x0: /* C.SRLI */ + case 0x1: /* C.SRAI */ + case 0x2: { /* C.ANDI */ + insn = insn_cbtype_read2(data); + insn.rs1 = insn.rd; + + if (cfunct2high == 0x0) { + insn.name = "srli"; + } else if (cfunct2high == 0x1) { + insn.name = "srai"; + } else { + insn.name = "andi"; + } + PRINT_rd_rs1_imm(); + } + case 0x3: { + uint32_t cfunct1 = CFUNCT1(data); + + switch (cfunct1) { + case 0x0: { + uint32_t cfunct2low = CFUNCT2LOW(data); + + insn = insn_catype_read(data); + insn.rs1 = insn.rd; + + switch (cfunct2low) { + case 0x0: /* C.SUB */ + insn.name = "sub"; + break; + case 0x1: /* C.XOR */ + insn.name = "xor"; + break; + case 0x2: /* C.OR */ + insn.name = "or"; + break; + case 0x3: /* C.AND */ + insn.name = "and"; + break; + } + break; + } + case 0x1: { + uint32_t cfunct2low = CFUNCT2LOW(data); + + insn = insn_catype_read(data); + insn.rs1 = insn.rd; + + switch (cfunct2low) { + case 0x0: /* C.SUBW */ + insn.name = "subw"; + break; + case 0x1: /* C.ADDW */ + insn.name = "addw"; + break; + } + break; + } + } + PRINT_rd_rs1_rs2(); + } + } + } + case 0x5: /* C.J */ + insn = insn_cjtype_read(data); + insn.rd = 0; + insn.name = "jal"; + PRINT_rd_imm(); + case 0x6: /* C.BEQZ */ + case 0x7: /* C.BNEZ */ + insn = insn_cbtype_read(data); + insn.rs2 = 0; + insn.name = copcode == 0x6 ? "beq" : "bne"; + PRINT_rs1_rs2_imm(); + } + } + case 0x2: { + uint32_t copcode = COPCODE(data); + switch (copcode) { + case 0x0: /* C.SLLI */ + insn = insn_citype_read(data); + insn.rs1 = insn.rd; + insn.name = "slli"; + PRINT_rd_rs1_imm(); + case 0x1: /* C.FLDSP */ + insn = insn_citype_read2(data); + insn.rs1 = 2; /* sp */ + insn.f = true; + insn.name = "fld"; + PRINT_rd_imm_rs1(); + case 0x2: /* C.LWSP */ + insn = insn_citype_read4(data); + assert(insn.rd != 0); + insn.rs1 = 2; /* sp */ + insn.name = "lw"; + PRINT_rd_imm_rs1(); + case 0x3: /* C.LDSP */ + insn = insn_citype_read2(data); + assert(insn.rd != 0); + insn.rs1 = 2; /* sp */ + insn.name = "ld"; + PRINT_rd_imm_rs1(); + case 0x4: { + uint32_t cfunct1 = CFUNCT1(data); + + switch (cfunct1) { + case 0x0: { + insn = insn_crtype_read(data); + + if (insn.rs2 == 0) { /* C.JR */ + assert(insn.rs1 != 0); + insn.rd = 0; + insn.name = "jalr"; + PRINT_rd_imm_rs1(); + } else { /* C.MV */ + insn.rd = insn.rs1; + insn.rs1 = 0; + insn.name = "add"; + PRINT_rd_rs1_rs2(); + } + } + case 0x1: { + insn = insn_crtype_read(data); + if (insn.rs1 == 0 && insn.rs2 == 0) { /* C.EBREAK */ + insn.name = "ebreak"; + PRINT_none(); + } else if (insn.rs2 == 0) { /* C.JALR */ + insn.rd = 1; /* ra */ + insn.name = "jalr"; + PRINT_rd_imm_rs1(); + } else { /* C.ADD */ + insn.rd = insn.rs1; + insn.name = "add"; + PRINT_rd_rs1_rs2(); + } + } + } + } + case 0x5: /* C.FSDSP */ + insn = insn_csstype_read(data); + insn.rs1 = 2; /* sp */ + insn.f = true; + insn.name = "fsd"; + PRINT_rs2_imm_rs1(); + case 0x6: /* C.SWSP */ + insn = insn_csstype_read2(data); + insn.rs1 = 2; /* sp */ + insn.name = "sw"; + PRINT_rs2_imm_rs1(); + case 0x7: /* C.SDSP */ + insn = insn_csstype_read(data); + insn.rs1 = 2; /* sp */ + insn.name = "sd"; + PRINT_rs2_imm_rs1(); + } + } + case 0x3: { + uint32_t opcode = OPCODE(data); + switch (opcode) { + case 0x0: { + uint32_t funct3 = FUNCT3(data); + + insn = insn_itype_read(data); + switch (funct3) { + case 0x0: /* LB */ + insn.name = "lb"; + break; + case 0x1: /* LH */ + insn.name = "lh"; + break; + case 0x2: /* LW */ + insn.name = "lw"; + break; + case 0x3: /* LD */ + insn.name = "ld"; + break; + case 0x4: /* LBU */ + insn.name = "lbu"; + break; + case 0x5: /* LHU */ + insn.name = "lhu"; + break; + case 0x6: /* LWU */ + insn.name = "lwu"; + break; + } + PRINT_rd_imm_rs1(); + } + case 0x1: { + uint32_t funct3 = FUNCT3(data); + + insn = insn_itype_read(data); + switch (funct3) { + case 0x2: /* FLW */ + insn.name = "flw"; + insn.f = true; + break; + case 0x3: /* FLD */ + insn.name = "fld"; + insn.f = true; + break; + } + PRINT_rd_imm_rs1(); + } + case 0x3: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: { /* FENCE */ + insn.name = "fence"; + // TODO: handle pred succ + PRINT_none(); + } + case 0x1: { /* FENCE.I */ + insn.name = "fence.i"; + PRINT_none(); + } + } + } + case 0x4: { + int hex = 0; + uint32_t funct3 = FUNCT3(data); + + insn = insn_itype_read(data); + switch (funct3) { + case 0x0: /* ADDI */ + insn.name = "addi"; + break; + case 0x1: { + uint32_t imm116 = IMM116(data); + if (imm116 == 0) { /* SLLI */ + insn.name = "slli"; + } + break; + } + case 0x2: /* SLTI */ + insn.name = "slti"; + break; + case 0x3: /* SLTIU */ + insn.name = "sltiu"; + break; + case 0x4: /* XORI */ + insn.name = "xori"; + hex = 1; + break; + case 0x5: { + uint32_t imm116 = IMM116(data); + + if (imm116 == 0x0) { /* SRLI */ + insn.name = "srli"; + } else if (imm116 == 0x10) { /* SRAI */ + insn.name = "srai"; + } + break; + } + case 0x6: /* ORI */ + insn.name = "ori"; + hex = 1; + break; + case 0x7: /* ANDI */ + insn.name = "andi"; + hex = 1; + break; + } + if(hex) { + PRINT_rd_rs1_immx(); + } else { + PRINT_rd_rs1_imm(); + } + } + case 0x5: /* AUIPC */ + insn = insn_utype_read(data); + insn.name = "auipc"; + PRINT_rd_imm(); + case 0x6: { + uint32_t funct3 = FUNCT3(data); + uint32_t funct7 = FUNCT7(data); + + insn = insn_itype_read(data); + + switch (funct3) { + case 0x0: /* ADDIW */ + insn.name = "addiw"; + break; + case 0x1: /* SLLIW */ + assert(funct7 == 0); + insn.name = "slliw"; + break; + case 0x5: { + switch (funct7) { + case 0x0: /* SRLIW */ + insn.name = "srliw"; + break; + case 0x20: /* SRAIW */ + insn.name = "sraiw"; + break; + } + } + } + PRINT_rd_rs1_rs2(); + } + case 0x8: { + uint32_t funct3 = FUNCT3(data); + + insn = insn_stype_read(data); + switch (funct3) { + case 0x0: /* SB */ + insn.name = "sb"; + break; + case 0x1: /* SH */ + insn.name = "sh"; + break; + case 0x2: /* SW */ + insn.name = "sw"; + break; + case 0x3: /* SD */ + insn.name = "sd"; + break; + } + PRINT_rs2_imm_rs1(); + } + case 0x9: { + uint32_t funct3 = FUNCT3(data); + + insn = insn_stype_read(data); + switch (funct3) { + case 0x2: /* FSW */ + insn.name = "fsw"; + insn.f = true; + break; + case 0x3: /* FSD */ + insn.name = "fsd"; + insn.f = true; + break; + } + PRINT_rs2_imm_rs1(); + } + case 0xc: { + insn = insn_rtype_read(data); + + uint32_t funct3 = FUNCT3(data); + uint32_t funct7 = FUNCT7(data); + + switch (funct7) { + case 0x0: { + switch (funct3) { + case 0x0: /* ADD */ + insn.name = "add"; + break; + case 0x1: /* SLL */ + insn.name = "sll"; + break; + case 0x2: /* SLT */ + insn.name = "slt"; + break; + case 0x3: /* SLTU */ + insn.name = "sltu"; + break; + case 0x4: /* XOR */ + insn.name = "xor"; + break; + case 0x5: /* SRL */ + insn.name = "srl"; + break; + case 0x6: /* OR */ + insn.name = "or"; + break; + case 0x7: /* AND */ + insn.name = "and"; + break; + } + } + break; + case 0x1: { + switch (funct3) { + case 0x0: /* MUL */ + insn.name = "mul"; + break; + case 0x1: /* MULH */ + insn.name = "mulh"; + break; + case 0x2: /* MULHSU */ + insn.name = "mulhsu"; + break; + case 0x3: /* MULHU */ + insn.name = "mulhu"; + break; + case 0x4: /* DIV */ + insn.name = "div"; + break; + case 0x5: /* DIVU */ + insn.name = "divu"; + break; + case 0x6: /* REM */ + insn.name = "rem"; + break; + case 0x7: /* REMU */ + insn.name = "remu"; + break; + } + } + break; + case 0x20: { + switch (funct3) { + case 0x0: /* SUB */ + insn.name = "sub"; + break; + case 0x5: /* SRA */ + insn.name = "sra"; + break; + } + } + break; + } + PRINT_rd_rs1_rs2(); + } + case 0xd: /* LUI */ + insn = insn_utype_read(data); + insn.name = "lui"; + PRINT_rd_immx(); + case 0xe: { + insn = insn_rtype_read(data); + + uint32_t funct3 = FUNCT3(data); + uint32_t funct7 = FUNCT7(data); + + switch (funct7) { + case 0x0: { + switch (funct3) { + case 0x0: /* ADDW */ + insn.name = "addw"; + break; + case 0x1: /* SLLW */ + insn.name = "sllw"; + break; + case 0x5: /* SRLW */ + insn.name = "srlw"; + break; + } + } + break; + case 0x1: { + switch (funct3) { + case 0x0: /* MULW */ + insn.name = "mulw"; + break; + case 0x4: /* DIVW */ + insn.name = "divw"; + break; + case 0x5: /* DIVUW */ + insn.name = "divuw"; + break; + case 0x6: /* REMW */ + insn.name = "remw"; + break; + case 0x7: /* REMUW */ + insn.name = "remuw"; + break; + } + } + break; + case 0x20: { + switch (funct3) { + case 0x0: /* SUBW */ + insn.name = "subw"; + break; + case 0x5: /* SRAW */ + insn.name = "sraw"; + break; + } + } + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x10: { + uint32_t funct2 = FUNCT2(data); + + insn = insn_fprtype_read(data); + switch (funct2) { + case 0x0: /* FMADD.S */ + insn.name = "fmadd.s"; + insn.f = true; + break; + case 0x1: /* FMADD.D */ + insn.name = "fmadd.d"; + insn.f = true; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x11: { + uint32_t funct2 = FUNCT2(data); + + insn = insn_fprtype_read(data); + switch (funct2) { + case 0x0: /* FMSUB.S */ + insn.name = "fmsub.s"; + insn.f = true; + break; + case 0x1: /* FMSUB.D */ + insn.name = "fmsub.d"; + insn.f = true; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x12: { + uint32_t funct2 = FUNCT2(data); + + insn = insn_fprtype_read(data); + switch (funct2) { + case 0x0: /* FNMSUB.S */ + insn.name = "fnmsub.s"; + insn.f = true; + break; + case 0x1: /* FNMSUB.D */ + insn.name = "fnmsub.d"; + insn.f = true; + break; + } + PRINT_rd_rs1_rs2_rs3(); + } + case 0x13: { + uint32_t funct2 = FUNCT2(data); + + insn = insn_fprtype_read(data); + switch (funct2) { + case 0x0: /* FNMADD.S */ + insn.name = "fnmadd.s"; + insn.f = true; + break; + case 0x1: /* FNMADD.D */ + insn.name = "fnmadd.d"; + insn.f = true; + break; + } + PRINT_rd_rs1_rs2_rs3(); + } + case 0x14: { + uint32_t funct7 = FUNCT7(data); + + insn = insn_rtype_read(data); + insn.f = true; + switch (funct7) { + case 0x0: /* FADD.S */ + insn.name = "fadd.s"; + PRINT_rd_rs1_rs2(); + case 0x1: /* FADD.D */ + insn.name = "fadd.d"; + PRINT_rd_rs1_rs2(); + case 0x4: /* FSUB.S */ + insn.name = "fsub.s"; + PRINT_rd_rs1_rs2(); + case 0x5: /* FSUB.D */ + insn.name = "fsub.d"; + PRINT_rd_rs1_rs2(); + case 0x8: /* FMUL.S */ + insn.name = "fmul.s"; + PRINT_rd_rs1_rs2(); + case 0x9: /* FMUL.D */ + insn.name = "fmul.d"; + PRINT_rd_rs1_rs2(); + case 0xc: /* FDIV.S */ + insn.name = "fdiv.s"; + PRINT_rd_rs1_rs2(); + case 0xd: /* FDIV.D */ + insn.name = "fdiv.d"; + PRINT_rd_rs1_rs2(); + case 0x10: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FSGNJ.S */ + insn.name = "fsgnj.s"; + break; + case 0x1: /* FSGNJN.S */ + insn.name = "fsgnjn.s"; + break; + case 0x2: /* FSGNJX.S */ + insn.name = "fsgnjx.s"; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x11: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FSGNJ.D */ + insn.name = "fsgnj.d"; + break; + case 0x1: /* FSGNJN.D */ + insn.name = "fsgnjn.d"; + break; + case 0x2: /* FSGNJX.D */ + insn.name = "fsgnjx.d"; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x14: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FMIN.S */ + insn.name = "fmin.s"; + break; + case 0x1: /* FMAX.S */ + insn.name = "fmax.s"; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x15: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FMIN.D */ + insn.name = "fmin.d"; + break; + case 0x1: /* FMAX.D */ + insn.name = "fmax.d"; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x20: /* FCVT.S.D */ + assert(RS2(data) == 1); + insn.name = "fcvt.s.d"; + PRINT_fd_fs1(); + case 0x21: /* FCVT.D.S */ + assert(RS2(data) == 0); + insn.name = "fcvt.d.s"; + PRINT_fd_fs1(); + case 0x2c: /* FSQRT.S */ + assert(insn.rs2 == 0); + insn.name = "fsqrt.s"; + PRINT_fd_fs1(); + case 0x2d: /* FSQRT.D */ + assert(insn.rs2 == 0); + insn.name = "fsqrt.d"; + PRINT_fd_fs1(); + case 0x50: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FLE.S */ + insn.name = "fle.s"; + break; + case 0x1: /* FLT.S */ + insn.name = "flt.s"; + break; + case 0x2: /* FEQ.S */ + insn.name = "feq.s"; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x51: { + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FLE.D */ + insn.name = "fle.d"; + break; + case 0x1: /* FLT.D */ + insn.name = "flt.d"; + break; + case 0x2: /* FEQ.D */ + insn.name = "feq.d"; + break; + } + PRINT_rd_rs1_rs2(); + } + case 0x60: { + uint32_t rs2 = RS2(data); + + switch (rs2) { + case 0x0: /* FCVT.W.S */ + insn.name = "fcvt.w.s"; + break; + case 0x1: /* FCVT.WU.S */ + insn.name = "fcvt.wu.s"; + break; + case 0x2: /* FCVT.L.S */ + insn.name = "fcvt.l.s"; + break; + case 0x3: /* FCVT.LU.S */ + insn.name = "fcvt.lu.s"; + break; + } + PRINT_xd_fs1(); + } + case 0x61: { + uint32_t rs2 = RS2(data); + + switch (rs2) { + case 0x0: /* FCVT.W.D */ + insn.name = "fcvt.w.d"; + break; + case 0x1: /* FCVT.WU.D */ + insn.name = "fcvt.wu.d"; + break; + case 0x2: /* FCVT.L.D */ + insn.name = "fcvt.l.d"; + break; + case 0x3: /* FCVT.LU.D */ + insn.name = "fcvt.lu.d"; + break; + } + PRINT_xd_fs1(); + } + case 0x68: { + uint32_t rs2 = RS2(data); + + switch (rs2) { + case 0x0: /* FCVT.S.W */ + insn.name = "fcvt.s.w"; + break; + case 0x1: /* FCVT.S.WU */ + insn.name = "fcvt.s.wu"; + break; + case 0x2: /* FCVT.S.L */ + insn.name = "fcvt.s.l"; + break; + case 0x3: /* FCVT.S.LU */ + insn.name = "fcvt.s.lu"; + break; + } + PRINT_fd_xs1(); + } + case 0x69: { + uint32_t rs2 = RS2(data); + + switch (rs2) { + case 0x0: /* FCVT.D.W */ + insn.name = "fcvt.d.w"; + break; + case 0x1: /* FCVT.D.WU */ + insn.name = "fcvt.d.wu"; + break; + case 0x2: /* FCVT.D.L */ + insn.name = "fcvt.d.l"; + break; + case 0x3: /* FCVT.D.LU */ + insn.name = "fcvt.d.lu"; + break; + } + PRINT_xd_fs1(); + } + case 0x70: { + assert(RS2(data) == 0); + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FMV.X.W */ + insn.name = "fmv.x.w"; + break; + case 0x1: /* FCLASS.S */ + insn.name = "fclass.s"; + break; + } + PRINT_xd_fs1(); + } + case 0x71: { + assert(RS2(data) == 0); + uint32_t funct3 = FUNCT3(data); + + switch (funct3) { + case 0x0: /* FMV.X.D */ + insn.name = "fmv.x.d"; + break; + case 0x1: /* FCLASS.D */ + insn.name = "fclass.d"; + break; + } + PRINT_xd_fs1(); + } + case 0x78: /* FMV_W_X */ + assert(RS2(data) == 0 && FUNCT3(data) == 0); + insn.name = "fmv.w.x"; + PRINT_fd_xs1(); + case 0x79: /* FMV_D_X */ + assert(RS2(data) == 0 && FUNCT3(data) == 0); + insn.name = "fmv.d.x"; + PRINT_fd_xs1(); + } + } + case 0x18: { + insn = insn_btype_read(data); + + uint32_t funct3 = FUNCT3(data); + switch (funct3) { + case 0x0: /* BEQ */ + insn.name = "beq"; + break; + case 0x1: /* BNE */ + insn.name = "bne"; + break; + case 0x4: /* BLT */ + insn.name = "blt"; + break; + case 0x5: /* BGE */ + insn.name = "bge"; + break; + case 0x6: /* BLTU */ + insn.name = "bltu"; + break; + case 0x7: /* BGEU */ + insn.name = "bgeu"; + break; + } + + PRINT_rs1_rs2_imm(); + } + case 0x19: /* JALR */ + insn = insn_itype_read(data); + insn.name = "jalr"; + PRINT_rd_imm_rs1(); + case 0x1b: /* JAL */ + insn = insn_jtype_read(data); + insn.name = "jal"; + PRINT_rd_imm(); + case 0x1c: { + if (data == 0x73) { /* ECALL */ + insn.name = "ecall"; + PRINT_none(); + } + + uint32_t funct3 = FUNCT3(data); + insn = insn_csrtype_read(data); + switch(funct3) { + case 0x1: /* CSRRW */ + insn.name = "csrrw"; + PRINT_rd_csr_rs1(); + case 0x2: /* CSRRS */ + insn.name = "csrrs"; + PRINT_rd_csr_rs1(); + case 0x3: /* CSRRC */ + insn.name = "csrrc"; + PRINT_rd_csr_rs1(); + case 0x5: /* CSRRWI */ + insn.name = "csrrwi"; + PRINT_rd_csr_uimm(); + case 0x6: /* CSRRSI */ + insn.name = "csrrsi"; + PRINT_rd_csr_uimm(); + case 0x7: /* CSRRCI */ + insn.name = "csrrci"; + PRINT_rd_csr_uimm(); + } + } + } + } + } + + snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(data)); + return buff; +} diff --git a/src/dynarec/rv64/rv64_printer.h b/src/dynarec/rv64/rv64_printer.h new file mode 100644 index 00000000..e22a208f --- /dev/null +++ b/src/dynarec/rv64/rv64_printer.h @@ -0,0 +1,8 @@ +#ifndef _RV64_PRINTER_H_ +#define _RV64_PRINTER_H_ +#include <stdint.h> +#include<stdbool.h> + +const char* rv64_print(uint32_t data, uint64_t addr); + +#endif //_RV64_PRINTER_H_ diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S new file mode 100644 index 00000000..229910b9 --- /dev/null +++ b/src/dynarec/rv64/rv64_prolog.S @@ -0,0 +1,53 @@ +//arm prologue for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.extern rv64_next + +.global rv64_prolog +rv64_prolog: + //save all 18 used register + addi sp, sp, -(8 * 14) + sd ra, (sp) // save ra + sd x8, 8(sp) // save fp + sd x18, 16(sp) + sd x19, 24(sp) + sd x20, 32(sp) + sd x21, 40(sp) + sd x22, 48(sp) + sd x23, 56(sp) + sd x24, 64(sp) + sd x25, 72(sp) + sd x26, 80(sp) + sd x27, 88(sp) + fsd f8, 96(sp) + fsd f9, 104(sp) + // save f18-f27 too probably + //setup emu -> register + ld x16, (a0) + ld x17, 8(a0) + ld x18, 16(a0) + ld x19, 24(a0) + ld x20, 32(a0) + ld x21, 40(a0) + ld x22, 48(a0) + ld x23, 56(a0) + ld x24, 64(a0) + ld x25, 72(a0) + ld x26, 80(a0) + ld x27, 88(a0) + ld x28, 96(a0) + ld x29, 104(a0) + ld x30, 112(a0) + ld x31, 120(a0) + ld x5, 128(a0) //xFlags + ld x6, 136(a0) // xRIP + // setup xMASK + xori x7, x0, -1 + srli x7, x7, 32 + // jump to block + jalr a1 diff --git a/src/emu/x64run.c b/src/emu/x64run.c index 032614da..a2343ca3 100755 --- a/src/emu/x64run.c +++ b/src/emu/x64run.c @@ -424,10 +424,14 @@ x64emurun: GB = EB->byte[0]; EB->byte[0] = tmp8u; } else { + #ifdef USE_CAS + GB = native_lock_xchg_b(EB, GB); + #else do { tmp8u = native_lock_read_b(EB); } while(native_lock_write_b(EB, GB)); GB = tmp8u; + #endif } // dynarec use need it's own mecanism #else @@ -461,10 +465,14 @@ x64emurun: if(rex.w) { GD->q[0] = native_lock_xchg(ED, GD->q[0]); } else { + #ifdef USE_CAS + GD->q[0] = native_lock_xchg(ED, GD->dword[0]); + #else do { tmp32u = native_lock_read_d(ED); } while(native_lock_write_d(ED, GD->dword[0])); GD->q[0] = tmp32u; + #endif } } #else diff --git a/src/emu/x64run66f0.c b/src/emu/x64run66f0.c index 7bf48a47..7b76fe32 100644 --- a/src/emu/x64run66f0.c +++ b/src/emu/x64run66f0.c @@ -34,6 +34,9 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr) int64_t tmp64s; uint64_t tmp64u, tmp64u2; reg64_t *oped, *opgd; + #ifdef USE_CAS + uint64_t tmpcas; + #endif opcode = F8; // REX prefix before the F0 are ignored diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c index 13eed9a2..06465fa1 100644 --- a/src/emu/x64runf0.c +++ b/src/emu/x64runf0.c @@ -36,6 +36,9 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr) int64_t tmp64s; uint64_t tmp64u, tmp64u2; reg64_t *oped, *opgd; + #ifdef USE_CAS + uint64_t tmpcas; + #endif opcode = F8; // REX prefix before the F0 are ignored diff --git a/src/include/dynarec_rv64.h b/src/include/dynarec_rv64.h new file mode 100644 index 00000000..9abb704b --- /dev/null +++ b/src/include/dynarec_rv64.h @@ -0,0 +1,10 @@ +#ifndef __DYNAREC_RV64_H_ +#define __DYNAREC_RV64_H_ + +typedef struct dynablock_s dynablock_t; +typedef struct x64emu_s x64emu_t; + +void CancelBlock64(); +void* FillBlock64(dynablock_t* block, uintptr_t addr); + +#endif //__DYNAREC_RV64_H_ \ No newline at end of file diff --git a/src/libtools/signals.c b/src/libtools/signals.c index 0558a88a..9526f49b 100755 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -481,6 +481,25 @@ void copyUCTXreg2Emu(x64emu_t* emu, ucontext_t* p, uintptr_t ip) { emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[30]; emu->ip.q[0] = ip; emu->eflags.x64 = p->uc_mcontext.__gregs[31]; +#elif defined(RV64) + emu->regs[_AX].q[0] = p->uc_mcontext.__gregs[16]; + emu->regs[_CX].q[0] = p->uc_mcontext.__gregs[17]; + emu->regs[_DX].q[0] = p->uc_mcontext.__gregs[18]; + emu->regs[_BX].q[0] = p->uc_mcontext.__gregs[19]; + emu->regs[_SP].q[0] = p->uc_mcontext.__gregs[20]; + emu->regs[_BP].q[0] = p->uc_mcontext.__gregs[21]; + emu->regs[_SI].q[0] = p->uc_mcontext.__gregs[22]; + emu->regs[_DI].q[0] = p->uc_mcontext.__gregs[23]; + emu->regs[_R8].q[0] = p->uc_mcontext.__gregs[24]; + emu->regs[_R9].q[0] = p->uc_mcontext.__gregs[25]; + emu->regs[_R10].q[0] = p->uc_mcontext.__gregs[26]; + emu->regs[_R11].q[0] = p->uc_mcontext.__gregs[27]; + emu->regs[_R12].q[0] = p->uc_mcontext.__gregs[28]; + emu->regs[_R13].q[0] = p->uc_mcontext.__gregs[29]; + emu->regs[_R14].q[0] = p->uc_mcontext.__gregs[30]; + emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[31]; + emu->ip.q[0] = ip; + emu->eflags.x64 = p->uc_mcontext.__gregs[5]; #else #error Unsupported architecture #endif @@ -512,6 +531,13 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void if(db) { frame = (uintptr_t)p->uc_mcontext.__gregs[12+_SP]; } +#elif defined(RV64) + ucontext_t *p = (ucontext_t *)ucntx; + void * pc = (void*)p->uc_mcontext.__gregs[0]; + dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc); + if(db) { + frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP]; + } #else #error Unsupported architecture #endif @@ -604,6 +630,26 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[30]; sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc); } +#elif defined(RV64) + if(db) { + sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.__gregs[16]; + sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.__gregs[17]; + sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.__gregs[18]; + sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.__gregs[19]; + sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.__gregs[20]; + sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.__gregs[21]; + sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.__gregs[22]; + sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.__gregs[23]; + sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.__gregs[24]; + sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.__gregs[25]; + sigcontext->uc_mcontext.gregs[X64_R10] = p->uc_mcontext.__gregs[26]; + sigcontext->uc_mcontext.gregs[X64_R11] = p->uc_mcontext.__gregs[27]; + sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.__gregs[28]; + sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.__gregs[29]; + sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.__gregs[30]; + sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[31]; + sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc); + } #else #error Unsupported architecture #endif @@ -902,6 +948,13 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx) ejb->emu->xmm[2].u128 = fpsimd->vregs[2]; ejb->emu->xmm[3].u128 = fpsimd->vregs[3]; }*/ +#elif defined(RV64) + /*if(fpsimd) { + ejb->emu->xmm[0].u128 = fpsimd->vregs[0]; + ejb->emu->xmm[1].u128 = fpsimd->vregs[1]; + ejb->emu->xmm[2].u128 = fpsimd->vregs[2]; + ejb->emu->xmm[3].u128 = fpsimd->vregs[3]; + }*/ #else #error Unsupported architecture #endif @@ -1036,6 +1089,14 @@ exit(-1); x64pc = getX64Address(db, (uintptr_t)pc); rsp = (void*)p->uc_mcontext.__gregs[12+_SP]; } +#elif defined(RV64) + if(db && p->uc_mcontext.__gregs[10]>0x10000) { + emu = (x64emu_t*)p->uc_mcontext.__gregs[10]; + } + if(db) { + x64pc = getX64Address(db, (uintptr_t)pc); + rsp = (void*)p->uc_mcontext.__gregs[16+_SP]; + } #else #error Unsupported Architecture #endif //arch @@ -1165,6 +1226,18 @@ exit(-1); for (int i=-4; i<4; ++i) { printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8)); } +#elif defined(RV64) + if(db) { + shown_regs = 1; + for (int i=0; i<16; ++i) { + if(!(i%4)) printf_log(log_minimum, "\n"); + printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]); + } + } + if(rsp!=addr) + for (int i=-4; i<4; ++i) { + printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8)); + } #else #warning TODO #endif @@ -1203,6 +1276,8 @@ void my_sigactionhandler(int32_t sig, siginfo_t* info, void * ucntx) void * pc = (void*)p->uc_mcontext.pc; #elif defined(LA464) void * pc = (void*)p->uc_mcontext.__pc; + #elif defined(RV64) + void * pc = (void*)p->uc_mcontext.__gregs[0]; #else #error Unsupported architecture #endif diff --git a/src/main.c b/src/main.c index 8f956361..7a0c2b1b 100755 --- a/src/main.c +++ b/src/main.c @@ -353,6 +353,9 @@ HWCAP2_ECV #elif defined(LA464) printf_log(LOG_INFO, "Dynarec for LoongArch"); printf_log(LOG_INFO, " PageSize:%zd ", box64_pagesize); +#elif defined(RV64) + printf_log(LOG_INFO, "Dynarec for RISC-V"); + printf_log(LOG_INFO, " PageSize:%zd ", box64_pagesize); #else #error Unsupported architecture #endif |