diff options
| author | Haichen Wu <www.wxmqq@gmail.com> | 2024-02-28 23:10:13 +0800 |
|---|---|---|
| committer | GitHub <noreply@github.com> | 2024-02-28 16:10:13 +0100 |
| commit | bdcfd697950197691eff44794d6e8a5f0e1b9a54 (patch) | |
| tree | 6d557fa946fe5550b4e1215c15e3ed92477acfd6 /src | |
| parent | dbe1bb0ec47b1857d267029a5a8f497829f9f2ad (diff) | |
| download | box64-bdcfd697950197691eff44794d6e8a5f0e1b9a54.tar.gz box64-bdcfd697950197691eff44794d6e8a5f0e1b9a54.zip | |
LARCH64 dynarec (#1295)
* LARCH64_DYNAREC dynarec * [LARCH64_DYNAREC] Change mapping for the registers
Diffstat (limited to 'src')
23 files changed, 2335 insertions, 0 deletions
diff --git a/src/dynarec/la464/dynarec_la464_00.c b/src/dynarec/la464/dynarec_la464_00.c new file mode 100644 index 00000000..28178cb9 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_00.c @@ -0,0 +1,115 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <signal.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "bridge.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "custommem.h" + +#include "la464_printer.h" +#include "dynarec_la464_private.h" +#include "dynarec_la464_functions.h" +#include "dynarec_la464_helper.h" + +int isSimpleWrapper(wrapper_t fun); +int isRetX87Wrapper(wrapper_t fun); + +uintptr_t dynarec64_00(dynarec_la464_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog) +{ + uint8_t nextop, opcode; + uint8_t gd, ed; + int8_t i8; + int32_t i32, tmp; + int64_t i64, j64; + uint8_t u8; + uint8_t gb1, gb2, eb1, eb2; + uint32_t u32; + uint64_t u64; + uint8_t wback, wb1, wb2, wb; + int64_t fixedaddress; + int unscaled; + int lock; + int cacheupd = 0; + + opcode = F8; + MAYUSE(eb1); + MAYUSE(eb2); + MAYUSE(j64); + MAYUSE(wb); + MAYUSE(lock); + MAYUSE(cacheupd); + + switch (opcode) { + case 0x50: + case 0x51: + case 0x52: + case 0x53: + case 0x54: + case 0x55: + case 0x56: + case 0x57: + INST_NAME("PUSH reg"); + gd = TO_LA464((opcode & 0x07) + (rex.b << 3)); + PUSH1z(gd); + break; + case 0x58: + case 0x59: + case 0x5A: + case 0x5B: + case 0x5C: + case 0x5D: + case 0x5E: + case 0x5F: + INST_NAME("POP reg"); + gd = TO_LA464((opcode & 0x07) + (rex.b << 3)); + POP1z(gd); + break; + case 0x89: + INST_NAME("MOV Ed, Gd"); + nextop = F8; + GETGD; + if (MODREG) { // reg <= reg + MVxw(TO_LA464((nextop & 7) + (rex.b << 3)), gd); + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0); + if (rex.w) { + ST_D(gd, ed, fixedaddress); + } else { + ST_W(gd, ed, fixedaddress); + } + SMWRITELOCK(lock); + } + break; + case 0x8D: + INST_NAME("LEA Gd, Ed"); + nextop = F8; + GETGD; + if (MODREG) { // reg <= reg? that's an invalid operation + DEFAULT; + } else { // mem <= reg + addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0); + MV(gd, ed); + if (!rex.w || rex.is32bits) { + ZEROUP(gd); // truncate the higher 32bits as asked + } + } + break; + default: + DEFAULT; + } + + return addr; +} \ No newline at end of file diff --git a/src/dynarec/la464/dynarec_la464_functions.c b/src/dynarec/la464/dynarec_la464_functions.c new file mode 100644 index 00000000..2467948f --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_functions.c @@ -0,0 +1,158 @@ +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> +#include <math.h> +#include <signal.h> +#include <sys/types.h> +#include <unistd.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "emu/x87emu_private.h" +#include "x64trace.h" +#include "signals.h" +#include "dynarec_la464.h" +#include "dynarec_la464_private.h" +#include "dynarec_la464_functions.h" +#include "custommem.h" +#include "bridge.h" + +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_la464_t* dyn) +{ + // TODO +} + +void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex) +{ + if (box64_dynarec_dump) { + printf_x64_instruction(rex.is32bits ? my_context->dec32 : my_context->dec, &dyn->insts[ninst].x64, name); + dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", + (box64_dynarec_dump > 1) ? "\e[32m" : "", + (void*)(dyn->native_start + dyn->insts[ninst].address), + dyn->insts[ninst].size / 4, + ninst, + dyn->insts[ninst].x64.barrier, + dyn->insts[ninst].x64.state_flags, + dyn->f.pending, + dyn->f.dfnone, + dyn->insts[ninst].x64.may_set ? "may" : "set", + dyn->insts[ninst].x64.set_flags, + dyn->insts[ninst].x64.gen_flags, + dyn->insts[ninst].x64.use_flags, + dyn->insts[ninst].x64.need_before, + dyn->insts[ninst].x64.need_after, + dyn->smread, dyn->smwrite); + if (dyn->insts[ninst].pred_sz) { + dynarec_log(LOG_NONE, ", pred="); + for (int ii = 0; ii < dyn->insts[ninst].pred_sz; ++ii) + dynarec_log(LOG_NONE, "%s%d", ii ? "/" : "", dyn->insts[ninst].pred[ii]); + } + if (dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts >= 0) + dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts); + if (dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts == -1) + dynarec_log(LOG_NONE, ", jmp=out"); + if (dyn->last_ip) + dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip); + // for (int ii = 0; ii < 24; ++ii) { + // switch (dyn->insts[ninst].n.neoncache[ii].t) { + // case NEON_CACHE_ST_D: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_ST_F: dynarec_log(LOG_NONE, " S%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_ST_I64: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_MM: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_XMMW: dynarec_log(LOG_NONE, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_XMMR: dynarec_log(LOG_NONE, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_SCR: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break; + // case NEON_CACHE_NONE: + // default: break; + // } + // } + // if (dyn->n.stack || dyn->insts[ninst].n.stack_next || dyn->insts[ninst].n.x87stack) + // dynarec_log(LOG_NONE, " X87:%d/%d(+%d/-%d)%d", dyn->n.stack, dyn->insts[ninst].n.stack_next, dyn->insts[ninst].n.stack_push, dyn->insts[ninst].n.stack_pop, dyn->insts[ninst].n.x87stack); + // if (dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2) + // dynarec_log(LOG_NONE, " %s:%d/%d", dyn->insts[ninst].n.swapped ? "SWP" : "CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2); + dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump > 1) ? "\e[m" : ""); + } +} + +// CAS +uint8_t extract_byte(uint32_t val, void* address){ + int idx = (((uintptr_t)address)&3)*8; + return (val>>idx)&0xff; +} +uint32_t insert_byte(uint32_t val, uint8_t b, void* address){ + int idx = (((uintptr_t)address)&3)*8; + val&=~(0xff<<idx); + val|=(((uint32_t)b)<<idx); + return val; +} + +// will go badly if address is unaligned +uint16_t extract_half(uint32_t val, void* address){ + int idx = (((uintptr_t)address)&3)*8; + return (val>>idx)&0xffff; +} +uint32_t insert_half(uint32_t val, uint16_t h, void* address){ + int idx = (((uintptr_t)address)&3)*8; + val&=~(0xffff<<idx); + val|=(((uint32_t)h)<<idx); + return val; +} + +uint8_t la464_lock_xchg_b(void* addr, uint8_t val) +{ + uint32_t ret; + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + do { + ret = *aligned; + } while(la464_lock_cas_d(aligned, ret, insert_byte(ret, val, addr))); + return extract_byte(ret, addr); +} + +uint16_t la464_lock_xchg_h(void* addr, uint16_t val) +{ + uint32_t ret; + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + do { + ret = *aligned; + } while(la464_lock_cas_d(aligned, ret, insert_half(ret, val, addr))); + return extract_half(ret, addr); +} + +int la464_lock_cas_b(void* addr, uint8_t ref, uint8_t val) +{ + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + uint32_t tmp = *aligned; + return la464_lock_cas_d(aligned, ref, insert_byte(tmp, val, addr)); +} + +int la464_lock_cas_h(void* addr, uint16_t ref, uint16_t val) +{ + uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3); + uint32_t tmp = *aligned; + return la464_lock_cas_d(aligned, ref, insert_half(tmp, val, addr)); +} + +void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode) +{ + dynarec_log(LOG_NONE, "\t%08x\t%s\n", opcode, la464_print(opcode, (uintptr_t)dyn->block)); +} + +void fpu_reset(dynarec_la464_t* dyn) +{ + // TODO +} + +void fpu_reset_ninst(dynarec_la464_t* dyn, int ninst) +{ + // TODO +} \ No newline at end of file diff --git a/src/dynarec/la464/dynarec_la464_functions.h b/src/dynarec/la464/dynarec_la464_functions.h new file mode 100644 index 00000000..6e504f9b --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_functions.h @@ -0,0 +1,19 @@ +#ifndef __DYNAREC_LA464_FUNCTIONS_H__ +#define __DYNAREC_LA464_FUNCTIONS_H__ + +#include "../dynarec_native_functions.h" + +typedef struct x64emu_s x64emu_t; +typedef struct dynarec_rv64_s dynarec_rv64_t; + +// Reset scratch regs counter +void fpu_reset_scratch(dynarec_la464_t* dyn); + +void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex); +void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode); + +// reset the cache +void fpu_reset(dynarec_native_t* dyn); +void fpu_reset_ninst(dynarec_native_t* dyn, int ninst); + +#endif //__DYNAREC_LA464_FUNCTIONS_H__ diff --git a/src/dynarec/la464/dynarec_la464_helper.c b/src/dynarec/la464/dynarec_la464_helper.c new file mode 100644 index 00000000..43ba7499 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_helper.c @@ -0,0 +1,431 @@ +#include <stdio.h> +#include <stdlib.h> +#include <stddef.h> +#include <errno.h> +#include <assert.h> +#include <string.h> + +#include "debug.h" +#include "box64context.h" +#include "dynarec.h" +#include "emu/x64emu_private.h" +#include "emu/x64run_private.h" +#include "x64run.h" +#include "x64emu.h" +#include "box64stack.h" +#include "callback.h" +#include "emu/x64run_private.h" +#include "x64trace.h" +#include "dynarec_native.h" +#include "../dynablock_private.h" +#include "custommem.h" + +#include "la464_printer.h" +#include "dynarec_la464_private.h" +#include "dynarec_la464_functions.h" +#include "dynarec_la464_helper.h" + +static uintptr_t geted_32(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int* l, int i12); + +/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */ +uintptr_t geted(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta) +{ + MAYUSE(dyn); + MAYUSE(ninst); + MAYUSE(delta); + + if (rex.is32bits) + return geted_32(dyn, addr, ninst, nextop, ed, hint, scratch, fixaddress, l, i12); + + int lock = l ? ((l == LOCK_LOCK) ? 1 : 2) : 0; + if (lock == 2) + *l = 0; + uint8_t ret = x2; + *fixaddress = 0; + if (hint > 0) ret = hint; + int maxval = 2047; + if (i12 > 1) + maxval -= i12; + MAYUSE(scratch); + if (!(nextop & 0xC0)) { + if ((nextop & 7) == 4) { + uint8_t sib = F8; + int sib_reg = ((sib >> 3) & 7) + (rex.x << 3); + int sib_reg2 = (sib & 0x7) + (rex.b << 3); + if ((sib & 0x7) == 5) { + int64_t tmp = F32S; + if (sib_reg != 4) { + if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) { + MOV64x(scratch, tmp); + ADDSL(ret, scratch, TO_LA464(sib_reg), sib >> 6, ret); + } else { + if (sib >> 6) { + SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6)); + } else { + ret = TO_LA464(sib_reg); + } + *fixaddress = tmp; + } + } else { + switch (lock) { + case 1: addLockAddress(tmp); break; + case 2: + if (isLockAddress(tmp)) *l = 1; + break; + } + MOV64x(ret, tmp); + } + } else { + if (sib_reg != 4) { + ADDSL(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg), sib >> 6, scratch); + } else { + ret = TO_LA464(sib_reg2); + } + } + } else if ((nextop & 7) == 5) { + int64_t tmp = F32S64; + int64_t adj = dyn->last_ip ? ((addr + delta) - dyn->last_ip) : 0; + if (i12 && adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) { + ret = xRIP; + *fixaddress = tmp + adj; + } else if (i12 && (tmp >= -2048) && (tmp <= maxval)) { + GETIP(addr + delta); + ret = xRIP; + *fixaddress = tmp; + } else if (adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) { + ADDI_D(ret, xRIP, tmp + adj); + } else if ((tmp >= -2048) && (tmp <= maxval)) { + GETIP(addr + delta); + ADDI_D(ret, xRIP, tmp); + } else if (tmp + addr + delta < 0x100000000LL) { + MOV64x(ret, tmp + addr + delta); + } else { + if (adj) { + MOV64x(ret, tmp + adj); + } else { + MOV64x(ret, tmp); + GETIP(addr + delta); + } + ADD_D(ret, ret, xRIP); + } + switch (lock) { + case 1: addLockAddress(addr + delta + tmp); break; + case 2: + if (isLockAddress(addr + delta + tmp)) *l = 1; + break; + } + } else { + ret = TO_LA464((nextop & 7) + (rex.b << 3)); + } + } else { + int64_t i64; + uint8_t sib = 0; + int sib_reg = 0; + if ((nextop & 7) == 4) { + sib = F8; + sib_reg = ((sib >> 3) & 7) + (rex.x << 3); + } + int sib_reg2 = (sib & 0x07) + (rex.b << 3); + if (nextop & 0x80) + i64 = F32S; + else + i64 = F8S; + if (i64 == 0 || ((i64 >= -2048) && (i64 <= 2047) && i12)) { + *fixaddress = i64; + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + ADDSL(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg), sib >> 6, scratch); + } else { + ret = TO_LA464(sib_reg2); + } + } else { + ret = TO_LA464((nextop & 0x07) + (rex.b << 3)); + } + } else { + if (i64 >= -2048 && i64 <= 2047) { + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + ADDSL(scratch, TO_LA464(sib_reg2), TO_LA464(sib_reg), sib >> 6, scratch); + } else { + scratch = TO_LA464(sib_reg2); + } + } else { + scratch = TO_LA464((nextop & 0x07) + (rex.b << 3)); + } + ADDI_D(ret, scratch, i64); + } else { + MOV64x(scratch, i64); + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + ADD_D(scratch, scratch, TO_LA464(sib_reg2)); + ADDSL(ret, scratch, TO_LA464(sib_reg), sib >> 6, ret); + } else { + PASS3(int tmp = TO_LA464(sib_reg2)); + ADD_D(ret, tmp, scratch); + } + } else { + PASS3(int tmp = TO_LA464((nextop & 0x07) + (rex.b << 3))); + ADD_D(ret, tmp, scratch); + } + } + } + } + *ed = ret; + return addr; +} + +static uintptr_t geted_32(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int* l, int i12) +{ + MAYUSE(dyn); + MAYUSE(ninst); + + int lock = l ? ((l == LOCK_LOCK) ? 1 : 2) : 0; + if (lock == 2) + *l = 0; + uint8_t ret = x2; + *fixaddress = 0; + if (hint > 0) ret = hint; + int maxval = 2047; + if (i12 > 1) + maxval -= i12; + MAYUSE(scratch); + if (!(nextop & 0xC0)) { + if ((nextop & 7) == 4) { + uint8_t sib = F8; + int sib_reg = (sib >> 3) & 0x7; + int sib_reg2 = sib & 0x7; + if (sib_reg2 == 5) { + int64_t tmp = F32S; + if (sib_reg != 4) { + if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) { + MOV32w(scratch, tmp); + if ((sib >> 6)) { + SLLI_D(ret, TO_LA464(sib_reg), sib >> 6); + ADD_W(ret, ret, scratch); + } else { + ADD_W(ret, TO_LA464(sib_reg), scratch); + } + } else { + if (sib >> 6) { + SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6)); + } else { + ret = TO_LA464(sib_reg); + } + *fixaddress = tmp; + } + } else { + switch (lock) { + case 1: addLockAddress((int32_t)tmp); break; + case 2: + if (isLockAddress((int32_t)tmp)) *l = 1; + break; + } + MOV32w(ret, tmp); + } + } else { + if (sib_reg != 4) { + if ((sib >> 6)) { + SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6)); + ADD_W(ret, ret, TO_LA464(sib_reg2)); + } else { + ADD_W(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg)); + } + } else { + ret = TO_LA464(sib_reg2); + } + } + } else if ((nextop & 7) == 5) { + uint32_t tmp = F32; + MOV32w(ret, tmp); + switch (lock) { + case 1: addLockAddress(tmp); break; + case 2: + if (isLockAddress(tmp)) *l = 1; + break; + } + } else { + ret = TO_LA464((nextop & 7)); + if (ret == hint) { + MOV32w(x2, 0xffffffff); + AND(hint, ret, x2); // to clear upper part + } + } + } else { + int64_t i32; + uint8_t sib = 0; + int sib_reg = 0; + if ((nextop & 7) == 4) { + sib = F8; + sib_reg = (sib >> 3) & 7; + } + int sib_reg2 = sib & 0x07; + if (nextop & 0x80) + i32 = F32S; + else + i32 = F8S; + if (i32 == 0 || ((i32 >= -2048) && (i32 <= 2047) && i12)) { + *fixaddress = i32; + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + if (sib >> 6) { + SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6)); + ADD_W(ret, ret, TO_LA464(sib_reg2)); + } else { + ADD_W(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg)); + } + } else { + ret = TO_LA464(sib_reg2); + } + } else { + ret = TO_LA464((nextop & 0x07)); + } + } else { + if (i32 >= -2048 && i32 <= 2047) { + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + if (sib >> 6) { + SLLI_D(scratch, TO_LA464(sib_reg), sib >> 6); + ADD_W(scratch, scratch, TO_LA464(sib_reg2)); + } else { + ADD_W(scratch, TO_LA464(sib_reg2), TO_LA464(sib_reg)); + } + } else { + scratch = TO_LA464(sib_reg2); + } + } else { + scratch = TO_LA464((nextop & 0x07)); + } + ADDI_W(ret, scratch, i32); + } else { + MOV32w(scratch, i32); + if ((nextop & 7) == 4) { + if (sib_reg != 4) { + ADD_W(scratch, scratch, TO_LA464(sib_reg2)); + if (sib >> 6) { + SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6)); + ADD_W(ret, ret, scratch); + } else { + ADD_W(ret, scratch, TO_LA464(sib_reg)); + } + } else { + PASS3(int tmp = TO_LA464(sib_reg2)); + ADD_W(ret, tmp, scratch); + } + } else { + PASS3(int tmp = TO_LA464((nextop & 0x07))); + ADD_W(ret, tmp, scratch); + } + } + } + } + *ed = ret; + return addr; +} + +void jump_to_epilog(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst) +{ + MAYUSE(dyn); + MAYUSE(ip); + MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to epilog\n"); + + if (reg) { + if (reg != xRIP) { + MV(xRIP, reg); + } + } else { + GETIP_(ip); + } + TABLE64(x2, (uintptr_t)la464_epilog); + SMEND(); + BR(x2); +} + +void jump_to_next(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits) +{ + MAYUSE(dyn); + MAYUSE(ninst); + MESSAGE(LOG_DUMP, "Jump to next\n"); + + if (reg) { + if (reg != xRIP) { + MV(xRIP, reg); + } + uintptr_t tbl = is32bits ? getJumpTable32() : getJumpTable64(); + MAYUSE(tbl); + TABLE64(x3, tbl); + if (!is32bits) { + SRLI_D(x2, xRIP, JMPTABL_START3); + ALSL_D(x3, x2, x3, 2); + LD_D(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety + } + MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask + SRLI_D(x2, xRIP, JMPTABL_START2 - 3); + AND(x2, x2, x4); + ADD_D(x3, x3, x2); + LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1); + if (JMPTABLE_MASK2 != JMPTABLE_MASK1) { + MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask + } + SRLI_D(x2, xRIP, JMPTABL_START1 - 3); + AND(x2, x2, x4); + ADD_D(x3, x3, x2); + LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1); + if (JMPTABLE_MASK0 < 2048) { + ANDI(x2, xRIP, JMPTABLE_MASK0); + } else { + if (JMPTABLE_MASK1 != JMPTABLE_MASK0) { + MOV64x(x4, JMPTABLE_MASK0); // x4 = mask + } + AND(x2, xRIP, x4); + } + ALSL_D(x3, x2, x3, 2); + LD_D(x2, x3, 0); // LR_D(x2, x3, 1, 1); + } else { + uintptr_t p = getJumpTableAddress64(ip); + MAYUSE(p); + TABLE64(x3, p); + GETIP_(ip); + LD_D(x2, x3, 0); // LR_D(x2, x3, 1, 1); + } + if (reg != x1) { + MV(x1, xRIP); + } + CLEARIP(); +#ifdef HAVE_TRACE +// MOVx(x3, 15); no access to PC reg +#endif + SMEND(); + JIRL(xRA, x2, 0x0); // save LR... +} + +void call_c(dynarec_la464_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg) +{ + // TODO +} + +void fpu_purgecache(dynarec_la464_t* dyn, int ninst, int next, int s1, int s2, int s3) +{ + // TODO +} + +void fpu_reflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3) +{ + // TODO +} + +void fpu_unreflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3) +{ + // TODO +} + +void fpu_reset_cache(dynarec_la464_t* dyn, int ninst, int reset_n) +{ + // TODO +} + +// propagate ST stack state, especial stack pop that are deferred +void fpu_propagate_stack(dynarec_la464_t* dyn, int ninst) +{ + // TODO +} diff --git a/src/dynarec/la464/dynarec_la464_helper.h b/src/dynarec/la464/dynarec_la464_helper.h new file mode 100644 index 00000000..4563a2ab --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_helper.h @@ -0,0 +1,246 @@ +#ifndef __DYNAREC_LA464_HELPER_H__ +#define __DYNAREC_LA464_HELPER_H__ + +// undef to get Close to SSE Float->int conversions +// #define PRECISE_CVT + +#if STEP == 0 +#include "dynarec_la464_pass0.h" +#elif STEP == 1 +#include "dynarec_la464_pass1.h" +#elif STEP == 2 +#include "dynarec_la464_pass2.h" +#elif STEP == 3 +#include "dynarec_la464_pass3.h" +#endif + +#include "debug.h" +#include "la464_emitter.h" +#include "../emu/x64primop.h" + +#define F8 *(uint8_t*)(addr++) +#define F8S *(int8_t*)(addr++) +#define F16 *(uint16_t*)(addr += 2, addr - 2) +#define F16S *(int16_t*)(addr += 2, addr - 2) +#define F32 *(uint32_t*)(addr += 4, addr - 4) +#define F32S *(int32_t*)(addr += 4, addr - 4) +#define F32S64 (uint64_t)(int64_t) F32S +#define F64 *(uint64_t*)(addr += 8, addr - 8) +#define PK(a) *(uint8_t*)(addr + a) +#define PK16(a) *(uint16_t*)(addr + a) +#define PK32(a) *(uint32_t*)(addr + a) +#define PK64(a) *(uint64_t*)(addr + a) +#define PKip(a) *(uint8_t*)(ip + a) + +// Strong mem emulation helpers +#define SMREAD_MIN 2 +#define SMWRITE_MIN 1 +// Sequence of Read will trigger a DMB on "first" read if strongmem is >= SMREAD_MIN +// Sequence of Write will trigger a DMB on "last" write if strongmem is >= 1 +// All Write operation that might use a lock all have a memory barrier if strongmem is >= SMWRITE_MIN +// Opcode will read +#define SMREAD() \ + ; \ + if ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN)) { \ + SMDMB(); \ + } else \ + dyn->smread = 1 +// Opcode will read with option forced lock +#define SMREADLOCK(lock) \ + if ((lock) || ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN))) { SMDMB(); } +// Opcode might read (depend on nextop) +#define SMMIGHTREAD() \ + if (!MODREG) { SMREAD(); } +// Opcode has wrote +#define SMWRITE() dyn->smwrite = 1 +// Opcode has wrote (strongmem>1 only) +#define SMWRITE2() \ + if (box64_dynarec_strongmem > SMREAD_MIN) dyn->smwrite = 1 +// Opcode has wrote with option forced lock +#define SMWRITELOCK(lock) \ + if (lock || (box64_dynarec_strongmem > SMWRITE_MIN)) { \ + SMDMB(); \ + } else \ + dyn->smwrite = 1 +// Opcode might have wrote (depend on nextop) +#define SMMIGHTWRITE() \ + if (!MODREG) { SMWRITE(); } +// Start of sequence +#define SMSTART() SMEND() +// End of sequence +#define SMEND() \ + if (dyn->smwrite && box64_dynarec_strongmem) { DBAR(0); } \ + dyn->smwrite = 0; \ + dyn->smread = 0; +// Force a Data memory barrier (for LOCK: prefix) +#define SMDMB() \ + DBAR(0); \ + dyn->smwrite = 0; \ + dyn->smread = 1 + +// LOCK_* define +#define LOCK_LOCK (int*)1 + +// GETGD get x64 register in gd +#define GETGD \ + do { \ + gd = TO_LA464(((nextop & 0x38) >> 3) + (rex.r << 3)); \ + } while (0); + +// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// R0 will not be pushed/popd if ret is -2 +#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0) +// CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// R0 will not be pushed/popd if ret is -2 +#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg) +// CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1) +// R0 will not be pushed/popd if ret is -2. Flags are not save/restored +#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0) + +#define MARKi(i) dyn->insts[ninst].mark[i] = dyn->native_size +#define GETMARKi(i) dyn->insts[ninst].mark[i] +#define MARK MARKi(0) +#define GETMARK GETMARKi(0) +#define MARK2 MARKi(1) +#define GETMARK2 GETMARKi(1) +#define MARK3 MARKi(2) +#define GETMARK3 GETMARKi(2) + +#define MARKFi(i) dyn->insts[ninst].markf[i] = dyn->native_size +#define GETMARKFi(i) dyn->insts[ninst].markf[i] +#define MARKF MARKFi(0) +#define GETMARKF GETMARKFi(0) +#define MARKF2 MARKFi(1) +#define GETMARKF2 GETMARKFi(1) + +#define MARKSEG dyn->insts[ninst].markseg = dyn->native_size +#define GETMARKSEG dyn->insts[ninst].markseg +#define MARKLOCK dyn->insts[ninst].marklock = dyn->native_size +#define GETMARKLOCK dyn->insts[ninst].marklock + +#ifndef READFLAGS +#define READFLAGS(A) + +#endif + +#ifndef BARRIER +#define BARRIER(A) +#endif +#ifndef DEFAULT +#define DEFAULT \ + *ok = -1; \ + BARRIER(2) +#endif + +#ifndef TABLE64 +#define TABLE64(A, V) +#endif + +#define ARCH_INIT() + +#if STEP < 2 +#define GETIP(A) +#define GETIP_(A) +#else +// put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later... +#define GETIP(A) \ + if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \ + uint64_t _delta_ip = (A)-dyn->last_ip; \ + dyn->last_ip += _delta_ip; \ + if (_delta_ip) { \ + ADDI_D(xRIP, xRIP, _delta_ip); \ + } \ + } else { \ + dyn->last_ip = (A); \ + if (dyn->last_ip < 0xffffffff) { \ + MOV64x(xRIP, dyn->last_ip); \ + } else \ + TABLE64(xRIP, dyn->last_ip); \ + } +#define GETIP_(A) \ + if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \ + int64_t _delta_ip = (A)-dyn->last_ip; \ + if (_delta_ip) { ADDI_D(xRIP, xRIP, _delta_ip); } \ + } else { \ + if ((A) < 0xffffffff) { \ + MOV64x(xRIP, (A)); \ + } else \ + TABLE64(xRIP, (A)); \ + } +#endif +#define CLEARIP() dyn->last_ip = 0 + +#define MODREG ((nextop & 0xC0) == 0xC0) + +void la464_epilog(void); +void* la464_next(x64emu_t* emu, uintptr_t addr); + +#ifndef STEPNAME +#define STEPNAME3(N, M) N##M +#define STEPNAME2(N, M) STEPNAME3(N, M) +#define STEPNAME(N) STEPNAME2(N, STEP) +#endif + +#define native_pass STEPNAME(native_pass) + +#define dynarec64_00 STEPNAME(dynarec64_00) + +#define geted STEPNAME(geted) +#define geted32 STEPNAME(geted32) +#define jump_to_epilog STEPNAME(jump_to_epilog) +#define jump_to_next STEPNAME(jump_to_next) +#define call_c STEPNAME(call_c) + +#define fpu_reset_cache STEPNAME(fpu_reset_cache) +#define fpu_propagate_stack STEPNAME(fpu_propagate_stack) +#define fpu_purgecache STEPNAME(fpu_purgecache) +#define fpu_reflectcache STEPNAME(fpu_reflectcache) +#define fpu_unreflectcache STEPNAME(fpu_unreflectcache) + +/* setup r2 to address pointed by */ +uintptr_t geted(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta); + +/* setup r2 to address pointed by */ +uintptr_t geted32(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta); + +// generic x64 helper +void jump_to_epilog(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst); +void jump_to_next(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits); +void call_c(dynarec_la464_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg); + +// reset the cache with n +void fpu_reset_cache(dynarec_la464_t* dyn, int ninst, int reset_n); +// propagate stack state +void fpu_propagate_stack(dynarec_la464_t* dyn, int ninst); +// purge the FPU cache (needs 3 scratch registers) +void fpu_purgecache(dynarec_la464_t* dyn, int ninst, int next, int s1, int s2, int s3); +void fpu_reflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3); +void fpu_unreflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3); + +uintptr_t dynarec64_00(dynarec_la464_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog); + +#if STEP < 3 +#define PASS3(A) +#else +#define PASS3(A) A +#endif + +#if STEP < 3 +#define MAYUSE(A) (void)A +#else +#define MAYUSE(A) +#endif + +#define NOTEST(s1) \ + if (box64_dynarec_test) { \ + ST_W(xZR, xEmu, offsetof(x64emu_t, test.test)); \ + ST_W(xZR, xEmu, offsetof(x64emu_t, test.clean)); \ + } + +#define GOTEST(s1, s2) \ + if (box64_dynarec_test) { \ + MOV32w(s2, 1); \ + ST_W(s2, xEmu, offsetof(x64emu_t, test.test)); \ + } + +#endif //__DYNAREC_LA464_HELPER_H__ \ No newline at end of file diff --git a/src/dynarec/la464/dynarec_la464_jmpnext.c b/src/dynarec/la464/dynarec_la464_jmpnext.c new file mode 100644 index 00000000..5a798e02 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_jmpnext.c @@ -0,0 +1,13 @@ +#include <stdint.h> + +#include "la464_emitter.h" + +#define EMIT(A) *block = (A); ++block; +void CreateJmpNext(void* addr, void* next) +{ + uint32_t* block = (uint32_t*)addr; + uintptr_t diff = (intptr_t)next - (intptr_t)addr; + PCADDU12I(x2, SPLIT20(diff)); + LD_D(x2, x2, SPLIT12(diff)); + BR(x2); +} \ No newline at end of file diff --git a/src/dynarec/la464/dynarec_la464_pass0.h b/src/dynarec/la464/dynarec_la464_pass0.h new file mode 100644 index 00000000..3f6a68ce --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_pass0.h @@ -0,0 +1,47 @@ +#define INIT uintptr_t sav_addr = addr +#define FINI \ + dyn->isize = addr - sav_addr; \ + dyn->insts[ninst].x64.addr = addr; \ + if (ninst) dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr + +#define MESSAGE(A, ...) +#define READFLAGS(A) \ + dyn->insts[ninst].x64.use_flags = A; \ + dyn->f.dfnone = 1; \ + dyn->f.pending = SF_SET +#define EMIT(A) dyn->native_size += 4 +#define BARRIER(A) \ + if (A != BARRIER_MAYBE) { \ + fpu_purgecache(dyn, ninst, 0, x1, x2, x3); \ + dyn->insts[ninst].x64.barrier = A; \ + } else \ + dyn->insts[ninst].barrier_maybe = 1 +#define NEW_INST \ + ++dyn->size; \ + if (dyn->size + 3 >= dyn->cap) { \ + dyn->insts = (instruction_native_t*)dynaRealloc(dyn->insts, sizeof(instruction_native_t) * dyn->cap * 2); \ + memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_native_t) * dyn->cap); \ + dyn->cap *= 2; \ + } \ + dyn->insts[ninst].x64.addr = ip; \ + dyn->insts[ninst].f_entry = dyn->f; \ + if (ninst) { dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; } +#define INST_EPILOG \ + dyn->insts[ninst].f_exit = dyn->f; \ + dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0; +#define INST_NAME(name) +#define DEFAULT \ + --dyn->size; \ + *ok = -1; \ + if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) { \ + dynarec_log(LOG_NONE, "%p: Dynarec stopped because of %sOpcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \ + (void*)ip, rex.is32bits ? "32bits " : "", \ + PKip(0), \ + PKip(1), PKip(2), PKip(3), \ + PKip(4), PKip(5), PKip(6), \ + PKip(7), PKip(8), PKip(9), \ + PKip(10), PKip(11), PKip(12), \ + PKip(13), PKip(14)); \ + printFunctionAddr(ip, " => "); \ + dynarec_log(LOG_NONE, "\n"); \ + } diff --git a/src/dynarec/la464/dynarec_la464_pass1.h b/src/dynarec/la464/dynarec_la464_pass1.h new file mode 100644 index 00000000..7d616b22 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_pass1.h @@ -0,0 +1,11 @@ +#define INIT +#define FINI +#define MESSAGE(A, ...) +#define EMIT(A) +#define NEW_INST \ + dyn->insts[ninst].f_entry = dyn->f; + +#define INST_EPILOG \ + dyn->insts[ninst].f_exit = dyn->f + +#define INST_NAME(name) \ No newline at end of file diff --git a/src/dynarec/la464/dynarec_la464_pass2.h b/src/dynarec/la464/dynarec_la464_pass2.h new file mode 100644 index 00000000..df7d7809 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_pass2.h @@ -0,0 +1,26 @@ +#define INIT dyn->native_size = 0 +#define FINI \ + if (ninst) { \ + dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \ + dyn->insts_size += 1 + ((dyn->insts[ninst].x64.size > (dyn->insts[ninst].size / 4)) ? dyn->insts[ninst].x64.size : (dyn->insts[ninst].size / 4)) / 15; \ + } + +#define MESSAGE(A, ...) +#define EMIT(A) \ + do { \ + dyn->insts[ninst].size += 4; \ + dyn->native_size += 4; \ + } while (0) +#define NEW_INST \ + if (ninst) { \ + dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size); \ + dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \ + } +#define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size; +#define INST_NAME(name) +#define TABLE64(A, V) \ + { \ + Table64(dyn, (V), 2); \ + EMIT(0); \ + EMIT(0); \ + } diff --git a/src/dynarec/la464/dynarec_la464_pass3.h b/src/dynarec/la464/dynarec_la464_pass3.h new file mode 100644 index 00000000..ecaf8a77 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_pass3.h @@ -0,0 +1,29 @@ +#define INIT +#define FINI \ + if (ninst) \ + addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size / 4); \ + addInst(dyn->instsize, &dyn->insts_size, 0, 0); +#define EMIT(A) \ + do { \ + if (box64_dynarec_dump) print_opcode(dyn, ninst, (uint32_t)(A)); \ + if ((uintptr_t)dyn->block < dyn->tablestart) \ + *(uint32_t*)(dyn->block) = (uint32_t)(A); \ + dyn->block += 4; \ + dyn->native_size += 4; \ + dyn->insts[ninst].size2 += 4; \ + } while (0) + +#define MESSAGE(A, ...) \ + if (box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__) +#define NEW_INST \ + if (ninst) \ + addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4); +#define INST_EPILOG +#define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex) +#define TABLE64(A, V) \ + { \ + int val64offset = Table64(dyn, (V), 3); \ + MESSAGE(LOG_DUMP, " Table64: 0x%lx\n", (V)); \ + PCADDU12I(A, SPLIT20(val64offset)); \ + LD_D(A, A, SPLIT12(val64offset)); \ + } diff --git a/src/dynarec/la464/dynarec_la464_private.h b/src/dynarec/la464/dynarec_la464_private.h new file mode 100644 index 00000000..89b248b0 --- /dev/null +++ b/src/dynarec/la464/dynarec_la464_private.h @@ -0,0 +1,88 @@ +#ifndef __DYNAREC_LA464_PRIVATE_H_ +#define __DYNAREC_LA464_PRIVATE_H_ + +#include "../dynarec_private.h" + +typedef struct x64emu_s x64emu_t; +typedef struct dynablock_s dynablock_t; +typedef struct instsize_s instsize_t; + +#define BARRIER_MAYBE 8 + +typedef struct flagcache_s { + int pending; // is there a pending flags here, or to check? + int dfnone; // if deferred flags is already set to df_none +} flagcache_t; + +typedef struct instruction_la464_s { + instruction_x64_t x64; + uintptr_t address; // (start) address of the arm emitted instruction + uintptr_t epilog; // epilog of current instruction (can be start of next, or barrier stuff) + int size; // size of the arm emitted instruction + int size2; // size of the arm emitted instrucion after pass2 + int pred_sz; // size of predecessor list + int *pred; // predecessor array + uintptr_t mark[3]; + uintptr_t markf[2]; + uintptr_t markseg; + uintptr_t marklock; + int pass2choice;// value for choices that are fixed on pass2 for pass3 + uintptr_t natcall; + int retn; + uint8_t barrier_maybe; + uint8_t will_write; + uint8_t last_write; + flagcache_t f_exit; // flags status at end of instruction + flagcache_t f_entry; // flags status before the instruction begin +} instruction_la464_t; + +typedef struct dynarec_la464_s { + instruction_la464_t* insts; + int32_t size; + int32_t cap; + uintptr_t start; // start of the block + uint32_t isize; // size in bytes of x64 instructions included + void* block; // memory pointer where next instruction is emitted + uintptr_t native_start; // start of the arm code + size_t native_size; // size of emitted arm code + uintptr_t last_ip; // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something + uint64_t* table64; // table of 64bits values + int table64size;// size of table (will be appended at end of executable code) + int table64cap; + uintptr_t tablestart; + uintptr_t jmp_next; // address of the jump_next address + flagcache_t f; + uintptr_t* next; // variable array of "next" jump address + int next_sz; + int next_cap; + int* jmps; // variable array of jump instructions + int jmp_sz; + int jmp_cap; + int* predecessor;// single array of all predecessor + dynablock_t* dynablock; + instsize_t* instsize; + size_t insts_size; // size of the instruction size array (calculated) + uintptr_t forward; // address of the last end of code while testing forward + uintptr_t forward_to; // address of the next jump to (to check if everything is ok) + int32_t forward_size; // size at the forward point + int forward_ninst; // ninst at the forward point + uint8_t smread; // for strongmem model emulation + uint8_t smwrite; // for strongmem model emulation + uint8_t always_test; + uint8_t abort; +} dynarec_la464_t; + +void add_next(dynarec_la464_t *dyn, uintptr_t addr); +uintptr_t get_closest_next(dynarec_la464_t *dyn, uintptr_t addr); +void add_jump(dynarec_la464_t *dyn, int ninst); +int get_first_jump(dynarec_la464_t *dyn, int next); +int is_nops(dynarec_la464_t *dyn, uintptr_t addr, int n); +int is_instructions(dynarec_la464_t *dyn, uintptr_t addr, int n); + +int Table64(dynarec_la464_t *dyn, uint64_t val, int pass); // add a value to table64 (if needed) and gives back the imm19 to use in LDR_literal + +void CreateJmpNext(void* addr, void* next); + +#define GO_TRACE(A, B, s0) \ + +#endif //__DYNAREC_ARM_PRIVATE_H_ diff --git a/src/dynarec/la464/la464_emitter.h b/src/dynarec/la464/la464_emitter.h new file mode 100644 index 00000000..43cd81c1 --- /dev/null +++ b/src/dynarec/la464/la464_emitter.h @@ -0,0 +1,369 @@ +#ifndef __LA464_EMITTER_H__ +#define __LA464_EMITTER_H__ +/* + LA464 Emitter +*/ + +// LA464 ABI +/* +Name Alias Meaning saver +--------------------------------------------------------- +r0 zero Zero register - +r1 ra Return address Callee +r2 tp Thread pointer - +r3 sp Stack pointer Callee +r4-r5 a0-a1 Function arguments,Return val. Caller +r6-r11 a2-a7 Function arguments Caller +r12-r20 t0-t8 Temp registers Caller +r21 Reserved Non-allocatable - +r22 fp/s9 Frame pointer/Static register Callee +r23-31 s0-s8 Static registers Callee +--------------------------------------------------------- +f0-f1 fa0-fa1 Function arguments,Return val. Caller +f2-f7 fa2-fa7 Function arguments Caller +f8-f23 ft0-ft15 Temp registers Caller +f24-f31 fs0-fs7 Static registers Callee +*/ +/* + LA464 GPR mapping + There is no 15 registers free, so split the regs in 2 part + AX..DI : r12-r19 + R8..R15: r23-r30 + flags in r31 + ip in r20 +*/ +// x86 Register mapping +#define xRAX 12 +#define xRCX 13 +#define xRDX 14 +#define xRBX 15 +#define xRSP 16 +#define xRBP 17 +#define xRSI 18 +#define xRDI 19 +#define xR8 23 +#define xR9 24 +#define xR10 25 +#define xR11 26 +#define xR12 27 +#define xR13 28 +#define xR14 29 +#define xR15 30 +#define xFlags 31 +#define xRIP 20 +// function to move from x86 regs number to LA464 reg number +#define TO_LA464(A) ((A)>7)?((A)+15):((A)+12) +// function to move from LA464 regs number to x86 reg number +#define FROM_LA464(A) ((A)>22)?((A)-15):((A)-12) +// 32bits version +#define wEAX xRAX +#define wECX xRCX +#define wEDX xRDX +#define wEBX xRBX +#define wESP xRSP +#define wEBP xRBP +#define wESI xRSI +#define wEDI xRDI +#define wR8 xR8 +#define wR9 xR9 +#define wR10 xR10 +#define wR11 xR11 +#define wR12 xR12 +#define wR13 xR13 +#define wR14 xR14 +#define wR15 xR15 +#define wFlags xFlags +// scratch registers +#define x1 5 +#define x2 6 +#define x3 7 +#define x4 8 +#define x5 9 +#define x6 10 +// 32bits version of scratch +#define w1 x1 +#define w2 x2 +#define w3 x3 +#define w4 x4 +#define w5 x5 +#define w6 x6 +// emu is r0 +#define xEmu 4 +// LA464 RA +#define xRA 1 +#define ra xRA +// LA464 SP +#define xSP 3 +// xZR regs +#define xZR 0 +#define wZR xZR +#define r0 xZR + +// split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative +#define SPLIT20(A) (((A) + 0x800) >> 12) +#define SPLIT12(A) ((A) & 0xfff) + +// ZERO the upper part +#define ZEROUP(r) \ + do { \ + MOV32w(x2, 0xffffffff); \ + AND(r, r, x2); \ + } while (0); + +#define type_4R(opc, ra, rk, rj, rd) ((opc) << 20 | (ra) << 15 | (rk) << 10 | (rj) << 5 | (rd)) +#define type_3R(opc, rk, rj, rd) ((opc) << 15 | (rk) << 10 | (rj) << 5 | (rd)) +#define type_3RI2(opc, imm2, rk, rj, rd) ((opc) << 17 | ((imm2) & 0x3) << 15 | (rk) << 10 | (rj) << 5 | (rd)) +#define type_2R(opc, rj, rd) ((opc) << 10 | (rj) << 5 | (rd)) +#define type_2RI5(opc, imm5, rj, rd) ((opc) << 15 | ((imm5) & 0x1F) << 10 | (rj) << 5 | (rd)) +#define type_2RI6(opc, imm6, rj, rd) ((opc) << 16 | ((imm6) & 0x3F) << 10 | (rj) << 5 | (rd)) +#define type_2RI8(opc, imm8, rj, rd) ((opc) << 18 | ((imm8) & 0xFF) << 10 | (rj) << 5 | (rd)) +#define type_2RI12(opc, imm12, rj, rd) ((opc) << 22 | ((imm12) & 0xFFF) << 10 | (rj) << 5 | (rd)) +#define type_2RI14(opc, imm14, rj, rd) ((opc) << 24 | ((imm14) & 0x3FFF) << 10 | (rj) << 5 | (rd)) +#define type_2RI16(opc, imm16, rj, rd) ((opc) << 26 | ((imm16) & 0xFFFF) << 10 | (rj) << 5 | (rd)) +#define type_1RI20(opc, imm20, rd) ((opc) << 25 | ((imm20) & 0xFFFFF) << 5 | (rd)) +#define type_1RI21(opc, imm21, rj) ((opc) << 26 | ((imm21) & 0xFFFF) << 10 | (rj) << 5 | ((imm21) & 0x1F0000) >> 16) +#define type_hint(opc, imm15) ((opc) << 15 | ((imm15) & 0x7FFF)) +#define type_I26(opc, imm26) ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26) & 0x3FF0000)) + +// tmp = GR[rj][31:0] + GR[rk][31:0] +// Gr[rd] = SignExtend(tmp[31:0], GRLEN) +#define ADD_W(rd, rj, rk) EMIT(type_3R(0b00000000000100000, rk, rj, rd)) +// tmp = GR[rj][31:0] - GR[rk][31:0] +// Gr[rd] = SignExtend(tmp[31:0], GRLEN) +#define SUB_W(rd, rj, rk) EMIT(type_3R(0b00000000000100010, rk, rj, rd)) +// tmp = GR[rj][63:0] + GR[rk][63:0] +// Gr[rd] = tmp[63:0] +#define ADD_D(rd, rj, rk) EMIT(type_3R(0b00000000000100001, rk, rj, rd)) +// tmp = GR[rj][63:0] - GR[rk][63:0] +// Gr[rd] = tmp[63:0] +#define SUB_D(rd, rj, rk) EMIT(type_3R(0b00000000000100011, rk, rj, rd)) + +// tmp = GR[rj][31:0] + SignExtend(imm12, 32) +// GR[rd] = SignExtend(tmp[31:0], GRLEN) +#define ADDI_W(rd, rj, imm12) EMIT(type_2RI12(0b0000001010, imm12, rj, rd)) +// tmp = GR[rj][63:0] + SignExtend(imm12, 64) +// GR[rd] = tmp[63:0] +#define ADDI_D(rd, rj, imm12) EMIT(type_2RI12(0b0000001011, imm12, rj, rd)) +// tmp = GR[rj][63:0] + SignExtend({imm16, 16'b0}, 64) +// GR[rd] = tmp[63:0] +#define ADDU16I_D(rd, rj, imm16) EMIT(type_2RI16(0b000100, imm16, rj, rd)) + +// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0] +// GR[rd] = SignExtend(tmp[31:0], GRLEN) +#define ALSL_W(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000010, imm2, rk, rj, rd)) +// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0] +// GR[rd] = ZeroExtend(tmp[31:0], GRLEN) +#define ALSL_WU(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000011, imm2, rk, rj, rd)) +// tmp = (GR[rj][63:0] << (imm2 + 1)) + GR[rk][63:0] +// GR[rd] = tmp[63:0] +#define ALSL_D(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000010110, imm2, rk, rj, rd)) + +// GR[rd] = SignExtend({imm20, 12'b0}, GRLEN) +#define LU12I_W(rd, imm20) EMIT(type_1RI20(0b0001010, imm20, rd)) +// GR[rd] = {SignExtend(imm20, 32), GR[rd][31:0]} +#define LU32I_D(rd, imm20) EMIT(type_1RI20(0b0001011, imm20, rd)) +// GR[rd] = {imm12, GR[rj][51:0]} +#define LU52I_D(rd, rj, imm12) EMIT(type_2RI12(0b0000001100, imm12, rj, rd)) + +// GR[rd] = PC + SignExtend({imm20, 2'b0}, GRLEN) +#define PCADDI(rd, imm20) EMIT(type_1RI20(0b0001100, imm20, rd)) +// GR[rd] = PC + SignExtend({imm20, 12'b0}, GRLEN) +#define PCADDU12I(rd, imm20) EMIT(type_1RI20(0b0001110, imm20, rd)) +// GR[rd] = PC + SignExtend({imm20, 18'b0}, GRLEN) +#define PCADDU18I(rd, imm20) EMIT(type_1RI20(0b0001111, imm20, rd)) +// tmp = PC + SignExtend({imm20, 12'b0}, GRLEN) +// GR[rd] = {tmp[GRLEN-1:12], 12'b0} +#define PCALAU12I(rd, imm20) EMIT(type_1RI20(0b0001101, imm20, rd)) + +// GR[rd] = GR[rj] & GR[rk] +#define AND(rd, rj, rk) EMIT(type_3R(0b00000000000101001, rk, rj, rd)) +// GR[rd] = GR[rj] | GR[rk] +#define OR(rd, rj, rk) EMIT(type_3R(0b00000000000101010, rk, rj, rd)) +// GR[rd] = ~(GR[rj] | GR[rk]) +#define NOR(rd, rj, rk) EMIT(type_3R(0b00000000000101000, rk, rj, rd)) +// GR[rd] = GR[rj] ^ GR[rk] +#define XOR(rd, rj, rk) EMIT(type_3R(0b00000000000101011, rk, rj, rd)) +// GR[rd] = GR[rj] & (~GR[rk]) +#define ANDN(rd, rj, rk) EMIT(type_3R(0b00000000000101101, rk, rj, rd)) +// GR[rd] = GR[rj] | (~GR[rk]) +#define ORN(rd, rj, rk) EMIT(type_3R(0b00000000000101100, rk, rj, rd)) + +// GR[rd] = GR[rj] & ZeroExtend(imm12, GRLEN) +#define ANDI(rd, rj, imm12) EMIT(type_2RI12(0b0000001101, imm12, rj, rd)) +// GR[rd] = GR[rj] | ZeroExtend(imm12, GRLEN) +#define ORI(rd, rj, imm12) EMIT(type_2RI12(0b0000001110, imm12, rj, rd)) +// GR[rd] = GR[rj] ^ ZeroExtend(imm12, GRLEN) +#define XORI(rd, rj, imm12) EMIT(type_2RI12(0b0000001111, imm12, rj, rd)) + +// GR[rd] = SLL(GR[rj][63:0], imm6) (Shift Left Logical) +#define SLLI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001000001, imm6, rj, rd)) +// GR[rd] = SRL(GR[rj][63:0], imm6) (Shift Right Logical) +#define SRLI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001000101, imm6, rj, rd)) +// GR[rd] = SRA(GR[rj][63:0], imm6) (Shift Right Arithmetic) +#define SRAI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001001001, imm6, rj, rd)) +// GR[rd] = ROTR(GR[rj][63:0], imm6) (Rotate To Right) +#define ROTRI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001001101, imm6, rj, rd)) + +// rd = rj + (rk << imm6) +#define ADDSL(rd, rs1, rs2, imm6, scratch) \ + if (!(imm6)) { \ + ADD_D(rd, rs1, rs2); \ + } else { \ + SLLI_D(scratch, rs2, imm6); \ + ADD_D(rd, rs1, scratch); \ + } + +// if GR[rj] == 0: +// PC = PC + SignExtend({imm21, 2'b0}, GRLEN) +#define BEQZ(rj, imm21) EMIT(type_1RI21(0b010000, (imm21) >> 2, rj)) +// if GR[rj] != 0: +// PC = PC + SignExtend({imm21, 2'b0}, GRLEN) +#define BNEZ(rj, imm21) EMIT(type_1RI21(0b010001, (imm21) >> 2, rj)) + +// GR[rd] = PC + 4 +// PC = GR[rj] + SignExtend({imm16, 2'b0}, GRLEN) +#define JIRL(rd, rj, imm16) EMIT(type_2RI16(0b010011, imm16, rj, rd)) +// PC = GR[rj] +#define BR(rj) JIRL(xZR, rj, 0x0) + +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// byte = MemoryLoad(paddr, BYTE) +// GR[rd] = SignExtend(byte, GRLEN) +#define LD_B(rd, rj, imm12) EMIT(type_2RI12(0b0010100000, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// haldword = MemoryLoad(paddr, HALFWORD) +// GR[rd] = SignExtend(halfword, GRLEN) +#define LD_H(rd, rj, imm12) EMIT(type_2RI12(0b0010100001, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// word = MemoryLoad(paddr, WORD) +// GR[rd] = SignExtend(word, GRLEN) +#define LD_W(rd, rj, imm12) EMIT(type_2RI12(0b0010100010, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// GR[rd] = MemoryLoad(paddr, DOUBLEWORD) +#define LD_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100011, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// byte = MemoryLoad(paddr, BYTE) +// GR[rd] = ZeroExtend(byte, GRLEN) +#define LD_BU(rd, rj, imm12) EMIT(type_2RI12(0b0010101000, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// halfword = MemoryLoad(paddr, HALFWORD) +// GR[rd] = ZeroExtend(halfword, GRLEN) +#define LD_HU(rd, rj, imm12) EMIT(type_2RI12(0b0010101001, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// word = MemoryLoad(paddr, WORD) +// GR[rd] = ZeroExtend(word, GRLEN) +#define LD_WU(rd, rj, imm12) EMIT(type_2RI12(0b0010101010, imm12, rj, rd)) + +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// MemoryStore(GR[rd][7:0], paddr, BYTE) +#define ST_B(rd, rj, imm12) EMIT(type_2RI12(0b0010100100, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// MemoryStore(GR[rd][15:0], paddr, HALFWORD) +#define ST_H(rd, rj, imm12) EMIT(type_2RI12(0b0010100101, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// MemoryStore(GR[rd][31:0], paddr, WORD) +#define ST_W(rd, rj, imm12) EMIT(type_2RI12(0b0010100110, imm12, rj, rd)) +// vaddr = GR[rj] + SignExtend(imm12, GRLEN) +// AddressComplianceCheck(vaddr) +// paddr = AddressTranslation(vaddr) +// MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD) +#define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd)) + +// GR[rd] = imm32 +#define MOV32w(rd, imm32) \ + if (((uint32_t)(imm32)) > 0xfffu) { \ + LU12I_W(rd, (imm32) >> 12); \ + ORI(rd, rd, imm32); \ + } else { \ + ORI(rd, xZR, imm32); \ + } +// GR[rd] = imm64 +#define MOV64x(rd, imm64) \ + MOV32w(rd, imm64); \ + if (((uint64_t)(imm64)) > 0xffffffffu) { \ + LU32I_D(rd, ((uint64_t)(imm64)) >> 32); \ + LU52I_D(rd, rd, ((uint64_t)(imm64)) >> 52); \ + } + +// rd[63:0] = rj[63:0] (pseudo instruction) +#define MV(rd, rj) ADDI_D(rd, rj, 0) +// rd = rj (pseudo instruction) +#define MVxw(rd, rj) \ + if (rex.w) { \ + MV(rd, rj); \ + } else { \ + MOV32w(x2, 0xffffffff); \ + AND(rd, rj, x2); \ + } +// rd = rj (pseudo instruction) +#define MVz(rd, rj) \ + if (rex.is32bits) { \ + MOV32w(x2, 0xffffffff); \ + AND(rd, rj, x2); \ + } else { \ + MV(rd, rj); \ + } + +// PUSH / POP reg[0:63] +#define PUSH1(reg) \ + do { \ + ST_D(reg, xRSP, -8); \ + ADDI_D(xRSP, xRSP, -8); \ + } while (0); +#define POP1(reg) \ + do { \ + LD_D(reg, xRSP, 0); \ + if (reg != xRSP) ADDI_D(xRSP, xRSP, 8); \ + } while (0); + +// PUSH / POP reg[0:31] +#define PUSH1_32(reg) \ + do { \ + ST_W(reg, xRSP, -4); \ + ADDI_W(xRSP, xRSP, -4); \ + } while (0); +#define POP1_32(reg) \ + do { \ + LD_WU(reg, xRSP, 0); \ + if (reg != xRSP) ADDI_W(xRSP, xRSP, 4); \ + } while (0); + +// POP reg +#define POP1z(reg) \ + if (rex.is32bits) { \ + POP1_32(reg); \ + } else { \ + POP1(reg); \ + } +// PUSH reg +#define PUSH1z(reg) \ + if (rex.is32bits) { \ + PUSH1_32(reg); \ + } else { \ + PUSH1(reg); \ + } + +// DBAR hint +#define DBAR(hint) EMIT(type_hint(0b00111000011100100, hint)) + +#endif //__ARM64_EMITTER_H__ \ No newline at end of file diff --git a/src/dynarec/la464/la464_epilog.S b/src/dynarec/la464/la464_epilog.S new file mode 100644 index 00000000..41eae0cc --- /dev/null +++ b/src/dynarec/la464/la464_epilog.S @@ -0,0 +1,55 @@ +//la464 epilog for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global la464_epilog +la464_epilog: + //update register -> emu + st.d $r12, $r4, (8 * 0) + st.d $r13, $r4, (8 * 1) + st.d $r14, $r4, (8 * 2) + st.d $r15, $r4, (8 * 3) + st.d $r16, $r4, (8 * 4) + st.d $r17, $r4, (8 * 5) + st.d $r18, $r4, (8 * 6) + st.d $r19, $r4, (8 * 7) + st.d $r23, $r4, (8 * 8) + st.d $r24, $r4, (8 * 9) + st.d $r25, $r4, (8 * 10) + st.d $r26, $r4, (8 * 11) + st.d $r27, $r4, (8 * 12) + st.d $r28, $r4, (8 * 13) + st.d $r29, $r4, (8 * 14) + st.d $r30, $r4, (8 * 15) + st.d $r31, $r4, (8 * 16) // xFlags + st.d $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so x27 must be EIP now) + ld.d $sp, $r4, 552 // restore saved sp from emu->xSPSave, see la464_prolog + ld.d $r11, $sp, -8 + st.d $r11, $r4, 552 + // vpop {d8-d15} + ld.d $r1, $sp, (8 * 0) // load ra + ld.d $r22, $sp, (8 * 1) // load fp + ld.d $r23, $sp, (8 * 2) + ld.d $r24, $sp, (8 * 3) + ld.d $r25, $sp, (8 * 4) + ld.d $r26, $sp, (8 * 5) + ld.d $r27, $sp, (8 * 6) + ld.d $r28, $sp, (8 * 7) + ld.d $r29, $sp, (8 * 8) + ld.d $r30, $sp, (8 * 9) + ld.d $r31, $sp, (8 * 10) + fld.d $f24, $sp, (8 * 11) + fld.d $f25, $sp, (8 * 12) + fld.d $f26, $sp, (8 * 13) + fld.d $f27, $sp, (8 * 14) + fld.d $f28, $sp, (8 * 15) + fld.d $f29, $sp, (8 * 16) + fld.d $f30, $sp, (8 * 17) + fld.d $f31, $sp, (8 * 18) + addi.d $sp, $sp, (8 * 19) + // end, return + ret diff --git a/src/dynarec/la464/la464_lock.S b/src/dynarec/la464/la464_lock.S new file mode 100644 index 00000000..9a728b14 --- /dev/null +++ b/src/dynarec/la464/la464_lock.S @@ -0,0 +1,186 @@ +// RV64 lock helper +// there is 2 part: read and write +// write return 0 on success, 1 on fail (value has been changed) + +.text +.align 4 + +.global la464_lock_xchg_dd +.global la464_lock_xchg_d +.global la464_lock_storeifnull +.global la464_lock_storeifnull_d +.global la464_lock_storeifref +.global la464_lock_storeifref_d +.global la464_lock_storeifref2_d +.global la464_lock_decifnot0b +.global la464_lock_storeb +.global la464_lock_incif0 +.global la464_lock_decifnot0 +.global la464_lock_store +.global la464_lock_store_dd +.global la464_lock_get_b +.global la464_lock_get_d +.global la464_lock_get_dd +.global la464_lock_cas_d +.global la464_lock_cas_dd +.global la464_lock_cas_dq + +la464_lock_xchg_dd: + // address is a0, value is a1, return old value in a0 + amswap_db.d $a2, $a1, $a0 + move $a0, $a2 + ret + +la464_lock_xchg_d: + // address is a0, value is a1, return old value in a0 + amswap_db.w $a2, $a1, $a0 + move $a0, $a2 + ret + +la464_lock_storeifnull: + // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value + dbar 0 + move $a3, $a1 + ll.d $a2, $a0, 0 + bnez $a2, 12 + sc.d $a3, $a0, 0 + beqz $a3, -16 + move $a0, $a2 + ret + +la464_lock_storeifnull_d: + // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value + dbar 0 + move $a3, $a1 + ll.w $a2, $a0, 0 + bnez $a2, 12 + sc.w $a3, $a0, 0 + beqz $a3, -16 + move $a0, $a2 + ret + +la464_lock_storeifref: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value) + dbar 0 + move $a4, $a1 + ll.d $a3, $a0, 0 + bne $a2, $a3, 24 + sc.d $a4, $a0, 0 + beqz $a4, -16 + dbar 0 + move $a0, $a1 + ret + dbar 0 + move $a0, $a3 + ret + +la464_lock_storeifref_d: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value) + dbar 0 + move $a4, $a1 + ll.w $a3, $a0, 0 + bne $a2, $a3, 24 + sc.w $a4, $a0, 0 + beqz $a4, -16 + dbar 0 + move $a0, $a1 + ret + dbar 0 + move $a0, $a3 + ret + +la464_lock_storeifref2_d: + // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value + dbar 0 + move $a4, $a1 + ll.w $a3, $a0, 0 + bne $a2, $a3, 12 + sc.w $a4, $a0, 0 + beqz $a4, -16 + move $a0, $a3 + ret + +la464_lock_decifnot0b: + dbar 0 + // TODO + ret + +la464_lock_storeb: + st.b $a1, $a0, 0 + dbar 0 + ret + +la464_lock_decifnot0: + dbar 0 + ll.w $a1, $a0, 0 + beqz $a1, 20 + addi.d $a1, $a1, -1 + move $a2, $a1 + sc.w $a2, $a0, 0 + beqz $a2, -20 + move $a0, $a1 + ret + +la464_lock_incif0: + dbar 0 + ll.w $a1, $a0, 0 + bnez $a1, 20 + addi.d $a1, $a1, 1 + move $a2, $a1 + sc.w $a2, $a0, 0 + beqz $a2, -20 + move $a0, $a1 + ret + +la464_lock_store: + st.w $a1, $a0, 0 + dbar 0 + ret + +la464_lock_store_dd: + st.d $a1, $a0, 0 + dbar 0 + ret + +la464_lock_get_b: + dbar 0 + ld.b $a0, $a0, 0 + ret + +la464_lock_get_d: + dbar 0 + ld.w $a0, $a0, 0 + ret + +la464_lock_get_dd: + dbar 0 + ld.d $a0, $a0, 0 + ret + +la464_lock_cas_d: + ll.w $a3, $a0, 0 + bne $a3, $a1, 16 + sc.w $a2, $a0, 0 + xori $a0, $a2, 1 + ret + li.d $a0, 1 + ret + +la464_lock_cas_dd: + ll.d $a3, $a0, 0 + bne $a3, $a1, 16 + sc.d $a2, $a0, 0 + xori $a0, $a2, 1 + ret + li.d $a0, 1 + ret + +la464_lock_cas_dq: + ll.d $a4, $a0, 0 + bne $a4, $a2, 20 + sc.d $a1, $a0, 0 + st.d $a3, $a0, 0 + xori $a0, $a1, 1 + ret + li.d $a0, 1 + ret diff --git a/src/dynarec/la464/la464_lock.h b/src/dynarec/la464/la464_lock.h new file mode 100644 index 00000000..8cff10d6 --- /dev/null +++ b/src/dynarec/la464/la464_lock.h @@ -0,0 +1,73 @@ +#ifndef __LA464_LOCK__H__ +#define __LA464_LOCK__H__ +#include <stdint.h> + +// Atomically exchange value at [p] with val, return old p +extern uintptr_t la464_lock_xchg_dd(void* p, uintptr_t val); + +// Atomically exchange value at [p] with val, return old p +extern uint32_t la464_lock_xchg_d(void* p, uint32_t val); + +// Atomically store value to [p] only if [p] is NULL. Return old [p] value +extern uint32_t la464_lock_storeifnull_d(void*p, uint32_t val); + +// Atomically store value to [p] only if [p] is NULL. Return old [p] value +extern void* la464_lock_storeifnull(void*p, void* val); + +// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern void* la464_lock_storeifref(void*p, void* val, void* ref); + +// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern uint32_t la464_lock_storeifref_d(void*p, uint32_t val, uint32_t ref); + +// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old) +extern uint32_t la464_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref); + +// decrement atomically the byte at [p] (but only if p not 0) +extern void la464_lock_decifnot0b(void*p); + +// atomic store (with memory barrier) +extern void la464_lock_storeb(void*p, uint8_t b); + +// increment atomically the int at [p] only if it was 0. Return the old value of [p] +extern int la464_lock_incif0(void*p); + +// decrement atomically the int at [p] (but only if p not 0) +extern int la464_lock_decifnot0(void*p); + +// atomic store (with memory barrier) +extern void la464_lock_store(void*p, uint32_t v); + +// atomic store (with memory barrier) +extern void la464_lock_store_dd(void*p, uint64_t v); + +// atomic get (with memory barrier) +extern uint32_t la464_lock_get_b(void* p); + +// atomic get (with memory barrier) +extern uint32_t la464_lock_get_d(void* p); + +// atomic get (with memory barrier) +extern void* la464_lock_get_dd(void* p); + +// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned +extern int la464_lock_cas_d(void* p, int32_t ref, int32_t val); + +// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned +extern int la464_lock_cas_dd(void* p, int64_t ref, int64_t val); + +// (mostly) Atomically store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned +extern int la464_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2); + +// Not defined in assembler but in dynarec_rv64_functions +uint8_t extract_byte(uint32_t val, void* address); +uint32_t insert_byte(uint32_t val, uint8_t b, void* address); +uint16_t extract_half(uint32_t val, void* address); +uint32_t insert_half(uint32_t val, uint16_t h, void* address); + +uint8_t la464_lock_xchg_b(void* addr, uint8_t v); +uint16_t la464_lock_xchg_h(void* addr, uint16_t v); +int la464_lock_cas_b(void* p, uint8_t ref, uint8_t val); +int la464_lock_cas_h(void* p, uint16_t ref, uint16_t val); + +#endif //__LA464_LOCK__H__ diff --git a/src/dynarec/la464/la464_next.S b/src/dynarec/la464/la464_next.S new file mode 100644 index 00000000..e2c4924d --- /dev/null +++ b/src/dynarec/la464/la464_next.S @@ -0,0 +1,52 @@ +//la464 update linker table for dynarec +//called with pointer to emu as 1st parameter +//and address of table to as 2nd parameter +//ip is at r12 + +.text +.align 4 + +.extern LinkNext + +.global la464_next + + .8byte 0 // NULL pointer before la464_next, for getDB +la464_next: + // emu is a0 + // IP address is a1 + addi.d $sp, $sp, -(8 * 12) + st.d $a0, $sp, 0 + st.d $a1, $sp, 8 + st.d $r12, $sp, 16 + st.d $r13, $sp, 24 + st.d $r14, $sp, 32 + st.d $r15, $sp, 40 + st.d $r16, $sp, 48 + st.d $r17, $sp, 56 + st.d $r18, $sp, 64 + st.d $r19, $sp, 72 + st.d $r20, $sp, 80 + st.d $r30, $sp, 88 // also save r30(rip) to allow change in LinkNext + + move $a2, $ra // "from" is in ra, so put in a2 + addi.d $a3, $sp, 88 // a3 is address to change rip + // call the function + bl LinkNext + // preserve return value + move $a3, $a0 + // pop regs + ld.d $a0, $sp, 0 + ld.d $a1, $sp, 8 + ld.d $r12, $sp, 16 + ld.d $r13, $sp, 24 + ld.d $r14, $sp, 32 + ld.d $r15, $sp, 40 + ld.d $r16, $sp, 48 + ld.d $r17, $sp, 56 + ld.d $r18, $sp, 64 + ld.d $r19, $sp, 72 + ld.d $r20, $sp, 80 + ld.d $r30, $sp, 88 + addi.d $sp, $sp, (8 * 12) + // return offset is jump address + jr $a3 \ No newline at end of file diff --git a/src/dynarec/la464/la464_printer.c b/src/dynarec/la464/la464_printer.c new file mode 100644 index 00000000..5651ec12 --- /dev/null +++ b/src/dynarec/la464/la464_printer.c @@ -0,0 +1,285 @@ +#include <stdint.h> +#include <stddef.h> +#include <string.h> +#include <stdio.h> + +#include "la464_printer.h" +#include "debug.h" + +static const char* Xt[] = {"xZR", "r1", "r2", "sp", "xEmu", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "r21", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "r31"}; + +typedef struct la464_print_s { + int d, j, k, a; + int i, u; +} la464_print_t; + +int isMask(uint32_t opcode, const char* mask, la464_print_t *a) +{ + if(strlen(mask)!=32) { + printf_log(LOG_NONE, "Error: printer mask \"%s\" in not len 32 but %ld\n", mask, strlen(mask)); + return 0; + } + memset(a, 0, sizeof(*a)); + int i = 31; + while(*mask) { + uint8_t v = (opcode>>i)&1; + switch(*mask) { + case '0': if(v!=0) return 0; break; + case '1': if(v!=1) return 0; break; + case 'd': a->d = (a->d<<1) | v; break; + case 'j': a->j = (a->j<<1) | v; break; + case 'k': a->k = (a->k<<1) | v; break; + case 'a': a->a = (a->a<<1) | v; break; + case 'i': a->i = (a->i<<1) | v; break; + case 'u': a->u = (a->u<<1) | v; break; + default: + printf_log(LOG_NONE, "Warning, printer mask use unhandled '%c'\n", *mask); + } + mask++; + --i; + } + + return 1; +} + +int64_t signExtend(uint32_t val, int sz) +{ + int64_t ret = val; + if((val>>(sz-1))&1) + ret |= (0xffffffffffffffffll<<sz); + return ret; +} + +const char* la464_print(uint32_t opcode, uintptr_t addr) +{ + static char buff[200]; + la464_print_t a; + #define Rd a.d + #define Rj a.j + #define Rk a.k + #define Ra a.a + #define imm a.i + #define imm_up a.u + // ADD.W + if(isMask(opcode, "00000000000100000kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ADD.W %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // SUB.W + if(isMask(opcode, "00000000000100010kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "SUB.W %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // ADD.D + if(isMask(opcode, "00000000000100001kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ADD.D %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // SUB.D + if(isMask(opcode, "00000000000100011kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "SUB.D %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // ADDI.W + if(isMask(opcode, "0000001010iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ADDI.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ADDI.D + if(isMask(opcode, "0000001011iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ADDI.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ADDU16I.D + if(isMask(opcode, "000100iiiiiiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ADDU16I.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ALSL.W + if(isMask(opcode, "000000000000010iikkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ALSL.W %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm); + return buff; + } + // ALSL.WU + if(isMask(opcode, "000000000000011iikkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ALSL.WU %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm); + return buff; + } + // ALSL.D + if(isMask(opcode, "000000000010110iikkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ALSL.D %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm); + return buff; + } + // LU12I.W + if(isMask(opcode, "0001010iiiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "LU12I.W %s, %d", Xt[Rd], imm); + return buff; + } + // LU32I.D + if(isMask(opcode, "0001011iiiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "LU32I.D %s, %d", Xt[Rd], imm); + return buff; + } + // LU52I.D + if(isMask(opcode, "0000001100iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LU52I.D %s, %s, %d", Xt[Rd], Xt[Rj], imm); + return buff; + } + // PCADDI + if(isMask(opcode, "0001100iiiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "PCADDI %s, %d", Xt[Rd], imm); + return buff; + } + // PCADDU12I + if(isMask(opcode, "0001101iiiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "PCADDU12I %s, %d", Xt[Rd], imm); + return buff; + } + // PCADDU18I + if(isMask(opcode, "0001110iiiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "PCADDU18I %s, %d", Xt[Rd], imm); + return buff; + } + // PCALAU12I + if(isMask(opcode, "0001111iiiiiiiiiiiiiiiiiiiiddddd", &a)) { + snprintf(buff, sizeof(buff), "PCALAU12I %s, %d", Xt[Rd], imm); + return buff; + } + // AND + if(isMask(opcode, "00000000000101001kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "AND %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // OR + if(isMask(opcode, "00000000000101010kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "OR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // NOR + if(isMask(opcode, "00000000000101000kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "NOR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // XOR + if(isMask(opcode, "00000000000101011kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "XOR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // ANDN + if(isMask(opcode, "00000000000101101kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ANDN %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // ORN + if(isMask(opcode, "00000000000101100kkkkkjjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ORN %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]); + return buff; + } + // ANDI + if(isMask(opcode, "0000001101iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ANDI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm); + return buff; + } + // ORI + if(isMask(opcode, "0000001110iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ORI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm); + return buff; + } + // XORI + if(isMask(opcode, "0000001111iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "XORI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm); + return buff; + } + // SLLI.D + if(isMask(opcode, "0000000001000001iiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "SLLI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm); + return buff; + } + // SRLI.D + if(isMask(opcode, "0000000001000101iiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "SRLI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm); + return buff; + } + // SRAI.D + if(isMask(opcode, "0000000001001001iiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "SRAI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm); + return buff; + } + // ROTRI.D + if(isMask(opcode, "0000000001001101iiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ROTRI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm); + return buff; + } + // BEQZ + if(isMask(opcode, "010000iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) { + snprintf(buff, sizeof(buff), "BEQZ %s, %d", Xt[Rj], imm + (imm_up << 16)); + return buff; + } + // BNEZ + if(isMask(opcode, "010001iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) { + snprintf(buff, sizeof(buff), "BNEZ %s, %d", Xt[Rj], imm + (imm_up << 16)); + return buff; + } + // JIRL + if(isMask(opcode, "010011iiiiiiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "JIRL %s, %s, %d", Xt[Rd], Xt[Rj], imm); + return buff; + } + // LD.B + if(isMask(opcode, "0010100000iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.B %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // LD.H + if(isMask(opcode, "0010100001iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.H %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // LD.W + if(isMask(opcode, "0010100010iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // LD.D + if(isMask(opcode, "0010100011iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // LD.BU + if(isMask(opcode, "0010101000iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.BU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // LD.HU + if(isMask(opcode, "0010101001iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.HU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // LD.WU + if(isMask(opcode, "0010101010iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "LD.WU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ST.B + if(isMask(opcode, "0010100100iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ST.B %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ST.H + if(isMask(opcode, "0010100101iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ST.H %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ST.W + if(isMask(opcode, "0010100110iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ST.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + // ST.D + if(isMask(opcode, "0010100111iiiiiiiiiiiijjjjjddddd", &a)) { + snprintf(buff, sizeof(buff), "ST.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12)); + return buff; + } + snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode)); + return buff; +} \ No newline at end of file diff --git a/src/dynarec/la464/la464_printer.h b/src/dynarec/la464/la464_printer.h new file mode 100644 index 00000000..e3ad8b2c --- /dev/null +++ b/src/dynarec/la464/la464_printer.h @@ -0,0 +1,6 @@ +#ifndef _LA464_PRINTER_H_ +#define _LA464_PRINTER_H_ + +const char* la464_print(uint32_t opcode, uint64_t addr); + +#endif //_LA464_PRINTER_H_ diff --git a/src/dynarec/la464/la464_prolog.S b/src/dynarec/la464/la464_prolog.S new file mode 100644 index 00000000..aafe4dc7 --- /dev/null +++ b/src/dynarec/la464/la464_prolog.S @@ -0,0 +1,60 @@ +//loongarch prologue for dynarec +//Save stuff, prepare stack and register +//called with pointer to emu as 1st parameter +//and address to jump to as 2nd parameter + +.text +.align 4 + +.global la464_prolog +la464_prolog: + //save all 18 used register + addi.d $sp, $sp, -(8 * 19) + st.d $r1, $sp, (8 * 0) //save ra + st.d $r22, $sp, (8 * 1) //save fp + // save s0 - s8 + st.d $r23, $sp, (8 * 2) + st.d $r24, $sp, (8 * 3) + st.d $r25, $sp, (8 * 4) + st.d $r26, $sp, (8 * 5) + st.d $r27, $sp, (8 * 6) + st.d $r28, $sp, (8 * 7) + st.d $r29, $sp, (8 * 8) + st.d $r30, $sp, (8 * 9) + st.d $r31, $sp, (8 * 10) + // save fs0 - fs7 + fst.d $f24, $sp, (8 * 11) + fst.d $f25, $sp, (8 * 12) + fst.d $f26, $sp, (8 * 13) + fst.d $f27, $sp, (8 * 14) + fst.d $f28, $sp, (8 * 15) + fst.d $f29, $sp, (8 * 16) + fst.d $f30, $sp, (8 * 17) + fst.d $f31, $sp, (8 * 18) + //setup emu -> register + ld.d $r12, $r4, (8 * 0) + ld.d $r13, $r4, (8 * 1) + ld.d $r14, $r4, (8 * 2) + ld.d $r15, $r4, (8 * 3) + ld.d $r16, $r4, (8 * 4) + ld.d $r17, $r4, (8 * 5) + ld.d $r18, $r4, (8 * 6) + ld.d $r19, $r4, (8 * 7) + ld.d $r23, $r4, (8 * 8) + ld.d $r24, $r4, (8 * 9) + ld.d $r25, $r4, (8 * 10) + ld.d $r26, $r4, (8 * 11) + ld.d $r27, $r4, (8 * 12) + ld.d $r28, $r4, (8 * 13) + ld.d $r29, $r4, (8 * 14) + ld.d $r30, $r4, (8 * 15) + ld.d $r31, $r4, (8 * 16) //xFlags + ld.d $r20, $r4, (8 * 17) //xRIP + ld.d $r11, $r4, 552 // grab an old value of emu->xSPSave + st.d $sp, $r4, 552 // save current sp to emu->xSPSave + // push sentinel onto the stack + st.d $r11, $sp, -16 + st.d $r0, $sp, -8 + addi.d $sp, $sp, -16 + //jump to function + jirl $r0, $a1, 0 diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h index 70a8eaa8..66cb026f 100644 --- a/src/dynarec/native_lock.h +++ b/src/dynarec/native_lock.h @@ -77,6 +77,44 @@ #define native_lock_get_d(A) rv64_lock_get_d(A) #define native_lock_get_dd(A) rv64_lock_get_dd(A) +#elif defined(LA464) +#include "la464/la464_lock.h" + +#define USE_CAS + +// no byte or 2-bytes atomic access on LA464 +#define native_lock_xchg_dd(A, B) la464_lock_xchg_dd(A, B) +#define native_lock_xchg_d(A, B) la464_lock_xchg_d(A, B) +#define native_lock_xchg_h(A, B) la464_lock_xchg_h(A, B) +#define native_lock_xchg_b(A, B) la464_lock_xchg_b(A, B) +#define native_lock_storeifref(A, B, C) la464_lock_storeifref(A, B, C) +#define native_lock_storeifref_d(A, B, C) la464_lock_storeifref_d(A, B, C) +#define native_lock_storeifref2_d(A, B, C) la464_lock_storeifref2_d(A, B, C) +#define native_lock_storeifnull(A, B) la464_lock_storeifnull(A, B) +#define native_lock_storeifnull_d(A, B) la464_lock_storeifnull_d(A, B) +#define native_lock_decifnot0b(A) la464_lock_decifnot0b(A) +#define native_lock_storeb(A, B) la464_lock_storeb(A, B) +#define native_lock_incif0(A) la464_lock_incif0(A) +#define native_lock_decifnot0(A) la464_lock_decifnot0(A) +#define native_lock_store(A, B) la464_lock_store(A, B) +#define native_lock_store_dd(A, B) la464_lock_store_dd(A, B) + +// there is no atomic move on 8bytes, so faking it +#define native_lock_read_b(A) tmpcas=*(uint8_t*)(A) +#define native_lock_write_b(A, B) la464_lock_cas_b(A, tmpcas, B) +// there is no atomic move on 16bytes, so faking it +#define native_lock_read_h(A) tmpcas=*(uint16_t*)(A) +#define native_lock_write_h(A, B) la464_lock_cas_h(A, tmpcas, B) +#define native_lock_read_d(A) tmpcas=*(uint32_t*)(A) +#define native_lock_write_d(A, B) la464_lock_cas_d(A, tmpcas, B) +#define native_lock_read_dd(A) tmpcas=*(uint64_t*)(A) +#define native_lock_write_dd(A, B) la464_lock_cas_dd(A, tmpcas, B) +#define native_lock_read_dq(A, B, C) *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1]; +#define native_lock_write_dq(A, B, C) la464_lock_cas_dq(C, A, tmpcas, B) +#define native_lock_get_b(A) la464_lock_get_b(A) +#define native_lock_get_d(A) la464_lock_get_d(A) +#define native_lock_get_dd(A) la464_lock_get_dd(A) + #else #error Unsupported architecture #endif diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h index 90c9b7b0..4548d7e9 100644 --- a/src/emu/x64emu_private.h +++ b/src/emu/x64emu_private.h @@ -74,6 +74,9 @@ typedef struct x64emu_s { #ifdef RV64 // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers. uintptr_t xSPSave; // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch. #endif + #ifdef LA464 // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers. + uintptr_t xSPSave; // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch. + #endif fpu_ld_t fpu_ld[8]; // for long double emulation / 80bits fld fst fpu_ll_t fpu_ll[8]; // for 64bits fild / fist sequence fpu_p_reg_t p_regs[8]; diff --git a/src/include/dynarec_la464.h b/src/include/dynarec_la464.h new file mode 100644 index 00000000..2adb3e98 --- /dev/null +++ b/src/include/dynarec_la464.h @@ -0,0 +1,10 @@ +#ifndef __DYNAREC_LA464_H_ +#define __DYNAREC_LA464_H_ + +typedef struct dynablock_s dynablock_t; +typedef struct x64emu_s x64emu_t; + +void CancelBlock64(void); +void* FillBlock64(dynablock_t* block, uintptr_t addr); + +#endif //__DYNAREC_LA464_H_ \ No newline at end of file diff --git a/src/libtools/signals.c b/src/libtools/signals.c index fac0f478..c3c1c572 100644 --- a/src/libtools/signals.c +++ b/src/libtools/signals.c @@ -1684,6 +1684,21 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for for (int i=-4; i<4; ++i) { printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8)); } +#elif defined(LA464) + if(db) { + shown_regs = 1; + for (int i=0; i<16; ++i) { + if(!(i%4)) printf_log(log_minimum, "\n"); + printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]); + } + printf_log(log_minimum, "\n"); + for (int i=0; i<6; ++i) + printf_log(log_minimum, "%s:0x%04x ", seg_name[i], emu->segs[i]); + } + if(rsp!=addr && getProtection((uintptr_t)rsp-4*8) && getProtection((uintptr_t)rsp+4*8)) + for (int i=-4; i<4; ++i) { + printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8)); + } #else #warning TODO #endif |