about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorHaichen Wu <www.wxmqq@gmail.com>2024-02-28 23:10:13 +0800
committerGitHub <noreply@github.com>2024-02-28 16:10:13 +0100
commitbdcfd697950197691eff44794d6e8a5f0e1b9a54 (patch)
tree6d557fa946fe5550b4e1215c15e3ed92477acfd6 /src
parentdbe1bb0ec47b1857d267029a5a8f497829f9f2ad (diff)
downloadbox64-bdcfd697950197691eff44794d6e8a5f0e1b9a54.tar.gz
box64-bdcfd697950197691eff44794d6e8a5f0e1b9a54.zip
LARCH64 dynarec (#1295)
* LARCH64_DYNAREC dynarec

* [LARCH64_DYNAREC] Change mapping for the registers
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la464/dynarec_la464_00.c115
-rw-r--r--src/dynarec/la464/dynarec_la464_functions.c158
-rw-r--r--src/dynarec/la464/dynarec_la464_functions.h19
-rw-r--r--src/dynarec/la464/dynarec_la464_helper.c431
-rw-r--r--src/dynarec/la464/dynarec_la464_helper.h246
-rw-r--r--src/dynarec/la464/dynarec_la464_jmpnext.c13
-rw-r--r--src/dynarec/la464/dynarec_la464_pass0.h47
-rw-r--r--src/dynarec/la464/dynarec_la464_pass1.h11
-rw-r--r--src/dynarec/la464/dynarec_la464_pass2.h26
-rw-r--r--src/dynarec/la464/dynarec_la464_pass3.h29
-rw-r--r--src/dynarec/la464/dynarec_la464_private.h88
-rw-r--r--src/dynarec/la464/la464_emitter.h369
-rw-r--r--src/dynarec/la464/la464_epilog.S55
-rw-r--r--src/dynarec/la464/la464_lock.S186
-rw-r--r--src/dynarec/la464/la464_lock.h73
-rw-r--r--src/dynarec/la464/la464_next.S52
-rw-r--r--src/dynarec/la464/la464_printer.c285
-rw-r--r--src/dynarec/la464/la464_printer.h6
-rw-r--r--src/dynarec/la464/la464_prolog.S60
-rw-r--r--src/dynarec/native_lock.h38
-rw-r--r--src/emu/x64emu_private.h3
-rw-r--r--src/include/dynarec_la464.h10
-rw-r--r--src/libtools/signals.c15
23 files changed, 2335 insertions, 0 deletions
diff --git a/src/dynarec/la464/dynarec_la464_00.c b/src/dynarec/la464/dynarec_la464_00.c
new file mode 100644
index 00000000..28178cb9
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_00.c
@@ -0,0 +1,115 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <signal.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "bridge.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "custommem.h"
+
+#include "la464_printer.h"
+#include "dynarec_la464_private.h"
+#include "dynarec_la464_functions.h"
+#include "dynarec_la464_helper.h"
+
+int isSimpleWrapper(wrapper_t fun);
+int isRetX87Wrapper(wrapper_t fun);
+
+uintptr_t dynarec64_00(dynarec_la464_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
+{
+    uint8_t nextop, opcode;
+    uint8_t gd, ed;
+    int8_t i8;
+    int32_t i32, tmp;
+    int64_t i64, j64;
+    uint8_t u8;
+    uint8_t gb1, gb2, eb1, eb2;
+    uint32_t u32;
+    uint64_t u64;
+    uint8_t wback, wb1, wb2, wb;
+    int64_t fixedaddress;
+    int unscaled;
+    int lock;
+    int cacheupd = 0;
+
+    opcode = F8;
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(j64);
+    MAYUSE(wb);
+    MAYUSE(lock);
+    MAYUSE(cacheupd);
+
+    switch (opcode) {
+        case 0x50:
+        case 0x51:
+        case 0x52:
+        case 0x53:
+        case 0x54:
+        case 0x55:
+        case 0x56:
+        case 0x57:
+            INST_NAME("PUSH reg");
+            gd = TO_LA464((opcode & 0x07) + (rex.b << 3));
+            PUSH1z(gd);
+            break;
+        case 0x58:
+        case 0x59:
+        case 0x5A:
+        case 0x5B:
+        case 0x5C:
+        case 0x5D:
+        case 0x5E:
+        case 0x5F:
+            INST_NAME("POP reg");
+            gd = TO_LA464((opcode & 0x07) + (rex.b << 3));
+            POP1z(gd);
+            break;
+        case 0x89:
+            INST_NAME("MOV Ed, Gd");
+            nextop = F8;
+            GETGD;
+            if (MODREG) { // reg <= reg
+                MVxw(TO_LA464((nextop & 7) + (rex.b << 3)), gd);
+            } else { // mem <= reg
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
+                if (rex.w) {
+                    ST_D(gd, ed, fixedaddress);
+                } else {
+                    ST_W(gd, ed, fixedaddress);
+                }
+                SMWRITELOCK(lock);
+            }
+            break;
+        case 0x8D:
+            INST_NAME("LEA Gd, Ed");
+            nextop = F8;
+            GETGD;
+            if (MODREG) { // reg <= reg? that's an invalid operation
+                DEFAULT;
+            } else { // mem <= reg
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
+                MV(gd, ed);
+                if (!rex.w || rex.is32bits) {
+                    ZEROUP(gd); // truncate the higher 32bits as asked
+                }
+            }
+            break;
+        default:
+            DEFAULT;
+    }
+
+    return addr;
+}
\ No newline at end of file
diff --git a/src/dynarec/la464/dynarec_la464_functions.c b/src/dynarec/la464/dynarec_la464_functions.c
new file mode 100644
index 00000000..2467948f
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_functions.c
@@ -0,0 +1,158 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <errno.h>
+#include <string.h>
+#include <math.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "emu/x87emu_private.h"
+#include "x64trace.h"
+#include "signals.h"
+#include "dynarec_la464.h"
+#include "dynarec_la464_private.h"
+#include "dynarec_la464_functions.h"
+#include "custommem.h"
+#include "bridge.h"
+
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_la464_t* dyn)
+{
+    // TODO
+}
+
+void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex)
+{
+    if (box64_dynarec_dump) {
+        printf_x64_instruction(rex.is32bits ? my_context->dec32 : my_context->dec, &dyn->insts[ninst].x64, name);
+        dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d",
+            (box64_dynarec_dump > 1) ? "\e[32m" : "",
+            (void*)(dyn->native_start + dyn->insts[ninst].address),
+            dyn->insts[ninst].size / 4,
+            ninst,
+            dyn->insts[ninst].x64.barrier,
+            dyn->insts[ninst].x64.state_flags,
+            dyn->f.pending,
+            dyn->f.dfnone,
+            dyn->insts[ninst].x64.may_set ? "may" : "set",
+            dyn->insts[ninst].x64.set_flags,
+            dyn->insts[ninst].x64.gen_flags,
+            dyn->insts[ninst].x64.use_flags,
+            dyn->insts[ninst].x64.need_before,
+            dyn->insts[ninst].x64.need_after,
+            dyn->smread, dyn->smwrite);
+        if (dyn->insts[ninst].pred_sz) {
+            dynarec_log(LOG_NONE, ", pred=");
+            for (int ii = 0; ii < dyn->insts[ninst].pred_sz; ++ii)
+                dynarec_log(LOG_NONE, "%s%d", ii ? "/" : "", dyn->insts[ninst].pred[ii]);
+        }
+        if (dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts >= 0)
+            dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);
+        if (dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts == -1)
+            dynarec_log(LOG_NONE, ", jmp=out");
+        if (dyn->last_ip)
+            dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);
+        // for (int ii = 0; ii < 24; ++ii) {
+        //     switch (dyn->insts[ninst].n.neoncache[ii].t) {
+        //         case NEON_CACHE_ST_D: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_ST_F: dynarec_log(LOG_NONE, " S%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_ST_I64: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_MM: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_XMMW: dynarec_log(LOG_NONE, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_XMMR: dynarec_log(LOG_NONE, " Q%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_SCR: dynarec_log(LOG_NONE, " D%d:%s", ii, getCacheName(dyn->insts[ninst].n.neoncache[ii].t, dyn->insts[ninst].n.neoncache[ii].n)); break;
+        //         case NEON_CACHE_NONE:
+        //         default: break;
+        //     }
+        // }
+        // if (dyn->n.stack || dyn->insts[ninst].n.stack_next || dyn->insts[ninst].n.x87stack)
+        //     dynarec_log(LOG_NONE, " X87:%d/%d(+%d/-%d)%d", dyn->n.stack, dyn->insts[ninst].n.stack_next, dyn->insts[ninst].n.stack_push, dyn->insts[ninst].n.stack_pop, dyn->insts[ninst].n.x87stack);
+        // if (dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2)
+        //     dynarec_log(LOG_NONE, " %s:%d/%d", dyn->insts[ninst].n.swapped ? "SWP" : "CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2);
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump > 1) ? "\e[m" : "");
+    }
+}
+
+// CAS
+uint8_t extract_byte(uint32_t val, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    return (val>>idx)&0xff;
+}
+uint32_t insert_byte(uint32_t val, uint8_t b, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    val&=~(0xff<<idx);
+    val|=(((uint32_t)b)<<idx);
+    return val;
+}
+
+// will go badly if address is unaligned
+uint16_t extract_half(uint32_t val, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    return (val>>idx)&0xffff;
+}
+uint32_t insert_half(uint32_t val, uint16_t h, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    val&=~(0xffff<<idx);
+    val|=(((uint32_t)h)<<idx);
+    return val;
+}
+
+uint8_t la464_lock_xchg_b(void* addr, uint8_t val)
+{
+    uint32_t ret;
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    do {
+        ret = *aligned;
+    } while(la464_lock_cas_d(aligned, ret, insert_byte(ret, val, addr)));
+    return extract_byte(ret, addr);
+}
+
+uint16_t la464_lock_xchg_h(void* addr, uint16_t val)
+{
+    uint32_t ret;
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    do {
+        ret = *aligned;
+    } while(la464_lock_cas_d(aligned, ret, insert_half(ret, val, addr)));
+    return extract_half(ret, addr);
+}
+
+int la464_lock_cas_b(void* addr, uint8_t ref, uint8_t val)
+{
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    uint32_t tmp = *aligned;
+    return la464_lock_cas_d(aligned, ref, insert_byte(tmp, val, addr));
+}
+
+int la464_lock_cas_h(void* addr, uint16_t ref, uint16_t val)
+{
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    uint32_t tmp = *aligned;
+    return la464_lock_cas_d(aligned, ref, insert_half(tmp, val, addr));
+}
+
+void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode)
+{
+    dynarec_log(LOG_NONE, "\t%08x\t%s\n", opcode, la464_print(opcode, (uintptr_t)dyn->block));
+}
+
+void fpu_reset(dynarec_la464_t* dyn)
+{
+    // TODO
+}
+
+void fpu_reset_ninst(dynarec_la464_t* dyn, int ninst)
+{
+    // TODO
+}
\ No newline at end of file
diff --git a/src/dynarec/la464/dynarec_la464_functions.h b/src/dynarec/la464/dynarec_la464_functions.h
new file mode 100644
index 00000000..6e504f9b
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_functions.h
@@ -0,0 +1,19 @@
+#ifndef __DYNAREC_LA464_FUNCTIONS_H__
+#define __DYNAREC_LA464_FUNCTIONS_H__
+
+#include "../dynarec_native_functions.h"
+
+typedef struct x64emu_s x64emu_t;
+typedef struct dynarec_rv64_s dynarec_rv64_t;
+
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_la464_t* dyn);
+
+void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex);
+void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode);
+
+// reset the cache
+void fpu_reset(dynarec_native_t* dyn);
+void fpu_reset_ninst(dynarec_native_t* dyn, int ninst);
+
+#endif //__DYNAREC_LA464_FUNCTIONS_H__
diff --git a/src/dynarec/la464/dynarec_la464_helper.c b/src/dynarec/la464/dynarec_la464_helper.c
new file mode 100644
index 00000000..43ba7499
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_helper.c
@@ -0,0 +1,431 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <errno.h>
+#include <assert.h>
+#include <string.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "../dynablock_private.h"
+#include "custommem.h"
+
+#include "la464_printer.h"
+#include "dynarec_la464_private.h"
+#include "dynarec_la464_functions.h"
+#include "dynarec_la464_helper.h"
+
+static uintptr_t geted_32(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int* l, int i12);
+
+/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */
+uintptr_t geted(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta)
+{
+    MAYUSE(dyn);
+    MAYUSE(ninst);
+    MAYUSE(delta);
+
+    if (rex.is32bits)
+        return geted_32(dyn, addr, ninst, nextop, ed, hint, scratch, fixaddress, l, i12);
+
+    int lock = l ? ((l == LOCK_LOCK) ? 1 : 2) : 0;
+    if (lock == 2)
+        *l = 0;
+    uint8_t ret = x2;
+    *fixaddress = 0;
+    if (hint > 0) ret = hint;
+    int maxval = 2047;
+    if (i12 > 1)
+        maxval -= i12;
+    MAYUSE(scratch);
+    if (!(nextop & 0xC0)) {
+        if ((nextop & 7) == 4) {
+            uint8_t sib = F8;
+            int sib_reg = ((sib >> 3) & 7) + (rex.x << 3);
+            int sib_reg2 = (sib & 0x7) + (rex.b << 3);
+            if ((sib & 0x7) == 5) {
+                int64_t tmp = F32S;
+                if (sib_reg != 4) {
+                    if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) {
+                        MOV64x(scratch, tmp);
+                        ADDSL(ret, scratch, TO_LA464(sib_reg), sib >> 6, ret);
+                    } else {
+                        if (sib >> 6) {
+                            SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6));
+                        } else {
+                            ret = TO_LA464(sib_reg);
+                        }
+                        *fixaddress = tmp;
+                    }
+                } else {
+                    switch (lock) {
+                        case 1: addLockAddress(tmp); break;
+                        case 2:
+                            if (isLockAddress(tmp)) *l = 1;
+                            break;
+                    }
+                    MOV64x(ret, tmp);
+                }
+            } else {
+                if (sib_reg != 4) {
+                    ADDSL(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg), sib >> 6, scratch);
+                } else {
+                    ret = TO_LA464(sib_reg2);
+                }
+            }
+        } else if ((nextop & 7) == 5) {
+            int64_t tmp = F32S64;
+            int64_t adj = dyn->last_ip ? ((addr + delta) - dyn->last_ip) : 0;
+            if (i12 && adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) {
+                ret = xRIP;
+                *fixaddress = tmp + adj;
+            } else if (i12 && (tmp >= -2048) && (tmp <= maxval)) {
+                GETIP(addr + delta);
+                ret = xRIP;
+                *fixaddress = tmp;
+            } else if (adj && (tmp + adj >= -2048) && (tmp + adj <= maxval)) {
+                ADDI_D(ret, xRIP, tmp + adj);
+            } else if ((tmp >= -2048) && (tmp <= maxval)) {
+                GETIP(addr + delta);
+                ADDI_D(ret, xRIP, tmp);
+            } else if (tmp + addr + delta < 0x100000000LL) {
+                MOV64x(ret, tmp + addr + delta);
+            } else {
+                if (adj) {
+                    MOV64x(ret, tmp + adj);
+                } else {
+                    MOV64x(ret, tmp);
+                    GETIP(addr + delta);
+                }
+                ADD_D(ret, ret, xRIP);
+            }
+            switch (lock) {
+                case 1: addLockAddress(addr + delta + tmp); break;
+                case 2:
+                    if (isLockAddress(addr + delta + tmp)) *l = 1;
+                    break;
+            }
+        } else {
+            ret = TO_LA464((nextop & 7) + (rex.b << 3));
+        }
+    } else {
+        int64_t i64;
+        uint8_t sib = 0;
+        int sib_reg = 0;
+        if ((nextop & 7) == 4) {
+            sib = F8;
+            sib_reg = ((sib >> 3) & 7) + (rex.x << 3);
+        }
+        int sib_reg2 = (sib & 0x07) + (rex.b << 3);
+        if (nextop & 0x80)
+            i64 = F32S;
+        else
+            i64 = F8S;
+        if (i64 == 0 || ((i64 >= -2048) && (i64 <= 2047) && i12)) {
+            *fixaddress = i64;
+            if ((nextop & 7) == 4) {
+                if (sib_reg != 4) {
+                    ADDSL(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg), sib >> 6, scratch);
+                } else {
+                    ret = TO_LA464(sib_reg2);
+                }
+            } else {
+                ret = TO_LA464((nextop & 0x07) + (rex.b << 3));
+            }
+        } else {
+            if (i64 >= -2048 && i64 <= 2047) {
+                if ((nextop & 7) == 4) {
+                    if (sib_reg != 4) {
+                        ADDSL(scratch, TO_LA464(sib_reg2), TO_LA464(sib_reg), sib >> 6, scratch);
+                    } else {
+                        scratch = TO_LA464(sib_reg2);
+                    }
+                } else {
+                    scratch = TO_LA464((nextop & 0x07) + (rex.b << 3));
+                }
+                ADDI_D(ret, scratch, i64);
+            } else {
+                MOV64x(scratch, i64);
+                if ((nextop & 7) == 4) {
+                    if (sib_reg != 4) {
+                        ADD_D(scratch, scratch, TO_LA464(sib_reg2));
+                        ADDSL(ret, scratch, TO_LA464(sib_reg), sib >> 6, ret);
+                    } else {
+                        PASS3(int tmp = TO_LA464(sib_reg2));
+                        ADD_D(ret, tmp, scratch);
+                    }
+                } else {
+                    PASS3(int tmp = TO_LA464((nextop & 0x07) + (rex.b << 3)));
+                    ADD_D(ret, tmp, scratch);
+                }
+            }
+        }
+    }
+    *ed = ret;
+    return addr;
+}
+
+static uintptr_t geted_32(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int* l, int i12)
+{
+    MAYUSE(dyn);
+    MAYUSE(ninst);
+
+    int lock = l ? ((l == LOCK_LOCK) ? 1 : 2) : 0;
+    if (lock == 2)
+        *l = 0;
+    uint8_t ret = x2;
+    *fixaddress = 0;
+    if (hint > 0) ret = hint;
+    int maxval = 2047;
+    if (i12 > 1)
+        maxval -= i12;
+    MAYUSE(scratch);
+    if (!(nextop & 0xC0)) {
+        if ((nextop & 7) == 4) {
+            uint8_t sib = F8;
+            int sib_reg = (sib >> 3) & 0x7;
+            int sib_reg2 = sib & 0x7;
+            if (sib_reg2 == 5) {
+                int64_t tmp = F32S;
+                if (sib_reg != 4) {
+                    if (tmp && ((tmp < -2048) || (tmp > maxval) || !i12)) {
+                        MOV32w(scratch, tmp);
+                        if ((sib >> 6)) {
+                            SLLI_D(ret, TO_LA464(sib_reg), sib >> 6);
+                            ADD_W(ret, ret, scratch);
+                        } else {
+                            ADD_W(ret, TO_LA464(sib_reg), scratch);
+                        }
+                    } else {
+                        if (sib >> 6) {
+                            SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6));
+                        } else {
+                            ret = TO_LA464(sib_reg);
+                        }
+                        *fixaddress = tmp;
+                    }
+                } else {
+                    switch (lock) {
+                        case 1: addLockAddress((int32_t)tmp); break;
+                        case 2:
+                            if (isLockAddress((int32_t)tmp)) *l = 1;
+                            break;
+                    }
+                    MOV32w(ret, tmp);
+                }
+            } else {
+                if (sib_reg != 4) {
+                    if ((sib >> 6)) {
+                        SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6));
+                        ADD_W(ret, ret, TO_LA464(sib_reg2));
+                    } else {
+                        ADD_W(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg));
+                    }
+                } else {
+                    ret = TO_LA464(sib_reg2);
+                }
+            }
+        } else if ((nextop & 7) == 5) {
+            uint32_t tmp = F32;
+            MOV32w(ret, tmp);
+            switch (lock) {
+                case 1: addLockAddress(tmp); break;
+                case 2:
+                    if (isLockAddress(tmp)) *l = 1;
+                    break;
+            }
+        } else {
+            ret = TO_LA464((nextop & 7));
+            if (ret == hint) {
+                MOV32w(x2, 0xffffffff);
+                AND(hint, ret, x2); // to clear upper part
+            }
+        }
+    } else {
+        int64_t i32;
+        uint8_t sib = 0;
+        int sib_reg = 0;
+        if ((nextop & 7) == 4) {
+            sib = F8;
+            sib_reg = (sib >> 3) & 7;
+        }
+        int sib_reg2 = sib & 0x07;
+        if (nextop & 0x80)
+            i32 = F32S;
+        else
+            i32 = F8S;
+        if (i32 == 0 || ((i32 >= -2048) && (i32 <= 2047) && i12)) {
+            *fixaddress = i32;
+            if ((nextop & 7) == 4) {
+                if (sib_reg != 4) {
+                    if (sib >> 6) {
+                        SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6));
+                        ADD_W(ret, ret, TO_LA464(sib_reg2));
+                    } else {
+                        ADD_W(ret, TO_LA464(sib_reg2), TO_LA464(sib_reg));
+                    }
+                } else {
+                    ret = TO_LA464(sib_reg2);
+                }
+            } else {
+                ret = TO_LA464((nextop & 0x07));
+            }
+        } else {
+            if (i32 >= -2048 && i32 <= 2047) {
+                if ((nextop & 7) == 4) {
+                    if (sib_reg != 4) {
+                        if (sib >> 6) {
+                            SLLI_D(scratch, TO_LA464(sib_reg), sib >> 6);
+                            ADD_W(scratch, scratch, TO_LA464(sib_reg2));
+                        } else {
+                            ADD_W(scratch, TO_LA464(sib_reg2), TO_LA464(sib_reg));
+                        }
+                    } else {
+                        scratch = TO_LA464(sib_reg2);
+                    }
+                } else {
+                    scratch = TO_LA464((nextop & 0x07));
+                }
+                ADDI_W(ret, scratch, i32);
+            } else {
+                MOV32w(scratch, i32);
+                if ((nextop & 7) == 4) {
+                    if (sib_reg != 4) {
+                        ADD_W(scratch, scratch, TO_LA464(sib_reg2));
+                        if (sib >> 6) {
+                            SLLI_D(ret, TO_LA464(sib_reg), (sib >> 6));
+                            ADD_W(ret, ret, scratch);
+                        } else {
+                            ADD_W(ret, scratch, TO_LA464(sib_reg));
+                        }
+                    } else {
+                        PASS3(int tmp = TO_LA464(sib_reg2));
+                        ADD_W(ret, tmp, scratch);
+                    }
+                } else {
+                    PASS3(int tmp = TO_LA464((nextop & 0x07)));
+                    ADD_W(ret, tmp, scratch);
+                }
+            }
+        }
+    }
+    *ed = ret;
+    return addr;
+}
+
+void jump_to_epilog(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst)
+{
+    MAYUSE(dyn);
+    MAYUSE(ip);
+    MAYUSE(ninst);
+    MESSAGE(LOG_DUMP, "Jump to epilog\n");
+
+    if (reg) {
+        if (reg != xRIP) {
+            MV(xRIP, reg);
+        }
+    } else {
+        GETIP_(ip);
+    }
+    TABLE64(x2, (uintptr_t)la464_epilog);
+    SMEND();
+    BR(x2);
+}
+
+void jump_to_next(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits)
+{
+    MAYUSE(dyn);
+    MAYUSE(ninst);
+    MESSAGE(LOG_DUMP, "Jump to next\n");
+
+    if (reg) {
+        if (reg != xRIP) {
+            MV(xRIP, reg);
+        }
+        uintptr_t tbl = is32bits ? getJumpTable32() : getJumpTable64();
+        MAYUSE(tbl);
+        TABLE64(x3, tbl);
+        if (!is32bits) {
+            SRLI_D(x2, xRIP, JMPTABL_START3);
+            ALSL_D(x3, x2, x3, 2);
+            LD_D(x3, x3, 0); // could be LR_D(x3, x3, 1, 1); for better safety
+        }
+        MOV64x(x4, JMPTABLE_MASK2 << 3); // x4 = mask
+        SRLI_D(x2, xRIP, JMPTABL_START2 - 3);
+        AND(x2, x2, x4);
+        ADD_D(x3, x3, x2);
+        LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1);
+        if (JMPTABLE_MASK2 != JMPTABLE_MASK1) {
+            MOV64x(x4, JMPTABLE_MASK1 << 3); // x4 = mask
+        }
+        SRLI_D(x2, xRIP, JMPTABL_START1 - 3);
+        AND(x2, x2, x4);
+        ADD_D(x3, x3, x2);
+        LD_D(x3, x3, 0); // LR_D(x3, x3, 1, 1);
+        if (JMPTABLE_MASK0 < 2048) {
+            ANDI(x2, xRIP, JMPTABLE_MASK0);
+        } else {
+            if (JMPTABLE_MASK1 != JMPTABLE_MASK0) {
+                MOV64x(x4, JMPTABLE_MASK0); // x4 = mask
+            }
+            AND(x2, xRIP, x4);
+        }
+        ALSL_D(x3, x2, x3, 2);
+        LD_D(x2, x3, 0); // LR_D(x2, x3, 1, 1);
+    } else {
+        uintptr_t p = getJumpTableAddress64(ip);
+        MAYUSE(p);
+        TABLE64(x3, p);
+        GETIP_(ip);
+        LD_D(x2, x3, 0); // LR_D(x2, x3, 1, 1);
+    }
+    if (reg != x1) {
+        MV(x1, xRIP);
+    }
+    CLEARIP();
+#ifdef HAVE_TRACE
+// MOVx(x3, 15);    no access to PC reg
+#endif
+    SMEND();
+    JIRL(xRA, x2, 0x0); // save LR...
+}
+
+void call_c(dynarec_la464_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg)
+{
+    // TODO
+}
+
+void fpu_purgecache(dynarec_la464_t* dyn, int ninst, int next, int s1, int s2, int s3)
+{
+    // TODO
+}
+
+void fpu_reflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    // TODO
+}
+
+void fpu_unreflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    // TODO
+}
+
+void fpu_reset_cache(dynarec_la464_t* dyn, int ninst, int reset_n)
+{
+    // TODO
+}
+
+// propagate ST stack state, especial stack pop that are deferred
+void fpu_propagate_stack(dynarec_la464_t* dyn, int ninst)
+{
+    // TODO
+}
diff --git a/src/dynarec/la464/dynarec_la464_helper.h b/src/dynarec/la464/dynarec_la464_helper.h
new file mode 100644
index 00000000..4563a2ab
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_helper.h
@@ -0,0 +1,246 @@
+#ifndef __DYNAREC_LA464_HELPER_H__
+#define __DYNAREC_LA464_HELPER_H__
+
+// undef to get Close to SSE Float->int conversions
+// #define PRECISE_CVT
+
+#if STEP == 0
+#include "dynarec_la464_pass0.h"
+#elif STEP == 1
+#include "dynarec_la464_pass1.h"
+#elif STEP == 2
+#include "dynarec_la464_pass2.h"
+#elif STEP == 3
+#include "dynarec_la464_pass3.h"
+#endif
+
+#include "debug.h"
+#include "la464_emitter.h"
+#include "../emu/x64primop.h"
+
+#define F8      *(uint8_t*)(addr++)
+#define F8S     *(int8_t*)(addr++)
+#define F16     *(uint16_t*)(addr += 2, addr - 2)
+#define F16S    *(int16_t*)(addr += 2, addr - 2)
+#define F32     *(uint32_t*)(addr += 4, addr - 4)
+#define F32S    *(int32_t*)(addr += 4, addr - 4)
+#define F32S64  (uint64_t)(int64_t) F32S
+#define F64     *(uint64_t*)(addr += 8, addr - 8)
+#define PK(a)   *(uint8_t*)(addr + a)
+#define PK16(a) *(uint16_t*)(addr + a)
+#define PK32(a) *(uint32_t*)(addr + a)
+#define PK64(a) *(uint64_t*)(addr + a)
+#define PKip(a) *(uint8_t*)(ip + a)
+
+// Strong mem emulation helpers
+#define SMREAD_MIN  2
+#define SMWRITE_MIN 1
+// Sequence of Read will trigger a DMB on "first" read if strongmem is >= SMREAD_MIN
+// Sequence of Write will trigger a DMB on "last" write if strongmem is >= 1
+// All Write operation that might use a lock all have a memory barrier if strongmem is >= SMWRITE_MIN
+// Opcode will read
+#define SMREAD()                                                        \
+    ;                                                                   \
+    if ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN)) { \
+        SMDMB();                                                        \
+    } else                                                              \
+        dyn->smread = 1
+// Opcode will read with option forced lock
+#define SMREADLOCK(lock) \
+    if ((lock) || ((dyn->smread == 0) && (box64_dynarec_strongmem > SMREAD_MIN))) { SMDMB(); }
+// Opcode might read (depend on nextop)
+#define SMMIGHTREAD() \
+    if (!MODREG) { SMREAD(); }
+// Opcode has wrote
+#define SMWRITE() dyn->smwrite = 1
+// Opcode has wrote (strongmem>1 only)
+#define SMWRITE2() \
+    if (box64_dynarec_strongmem > SMREAD_MIN) dyn->smwrite = 1
+// Opcode has wrote with option forced lock
+#define SMWRITELOCK(lock)                                  \
+    if (lock || (box64_dynarec_strongmem > SMWRITE_MIN)) { \
+        SMDMB();                                           \
+    } else                                                 \
+        dyn->smwrite = 1
+// Opcode might have wrote (depend on nextop)
+#define SMMIGHTWRITE() \
+    if (!MODREG) { SMWRITE(); }
+// Start of sequence
+#define SMSTART() SMEND()
+// End of sequence
+#define SMEND()                                               \
+    if (dyn->smwrite && box64_dynarec_strongmem) { DBAR(0); } \
+    dyn->smwrite = 0;                                         \
+    dyn->smread = 0;
+// Force a Data memory barrier (for LOCK: prefix)
+#define SMDMB()       \
+    DBAR(0);          \
+    dyn->smwrite = 0; \
+    dyn->smread = 1
+
+// LOCK_* define
+#define LOCK_LOCK (int*)1
+
+// GETGD    get x64 register in gd
+#define GETGD                                                        \
+    do {                                                             \
+        gd = TO_LA464(((nextop & 0x38) >> 3) + (rex.r << 3)); \
+    } while (0);
+
+// CALL will use x7 for the call address. Return value can be put in ret (unless ret is -1)
+// R0 will not be pushed/popd if ret is -2
+#define CALL(F, ret) call_c(dyn, ninst, F, x7, ret, 1, 0)
+// CALL_ will use x7 for the call address. Return value can be put in ret (unless ret is -1)
+// R0 will not be pushed/popd if ret is -2
+#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x7, ret, 1, reg)
+// CALL_S will use x7 for the call address. Return value can be put in ret (unless ret is -1)
+// R0 will not be pushed/popd if ret is -2. Flags are not save/restored
+#define CALL_S(F, ret) call_c(dyn, ninst, F, x7, ret, 0, 0)
+
+#define MARKi(i)    dyn->insts[ninst].mark[i] = dyn->native_size
+#define GETMARKi(i) dyn->insts[ninst].mark[i]
+#define MARK        MARKi(0)
+#define GETMARK     GETMARKi(0)
+#define MARK2       MARKi(1)
+#define GETMARK2    GETMARKi(1)
+#define MARK3       MARKi(2)
+#define GETMARK3    GETMARKi(2)
+
+#define MARKFi(i)    dyn->insts[ninst].markf[i] = dyn->native_size
+#define GETMARKFi(i) dyn->insts[ninst].markf[i]
+#define MARKF        MARKFi(0)
+#define GETMARKF     GETMARKFi(0)
+#define MARKF2       MARKFi(1)
+#define GETMARKF2    GETMARKFi(1)
+
+#define MARKSEG     dyn->insts[ninst].markseg = dyn->native_size
+#define GETMARKSEG  dyn->insts[ninst].markseg
+#define MARKLOCK    dyn->insts[ninst].marklock = dyn->native_size
+#define GETMARKLOCK dyn->insts[ninst].marklock
+
+#ifndef READFLAGS
+#define READFLAGS(A)
+
+#endif
+
+#ifndef BARRIER
+#define BARRIER(A)
+#endif
+#ifndef DEFAULT
+#define DEFAULT \
+    *ok = -1;   \
+    BARRIER(2)
+#endif
+
+#ifndef TABLE64
+#define TABLE64(A, V)
+#endif
+
+#define ARCH_INIT()
+
+#if STEP < 2
+#define GETIP(A)
+#define GETIP_(A)
+#else
+// put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later...
+#define GETIP(A)                                     \
+    if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) { \
+        uint64_t _delta_ip = (A)-dyn->last_ip;       \
+        dyn->last_ip += _delta_ip;                   \
+        if (_delta_ip) {                             \
+            ADDI_D(xRIP, xRIP, _delta_ip);           \
+        }                                            \
+    } else {                                         \
+        dyn->last_ip = (A);                          \
+        if (dyn->last_ip < 0xffffffff) {             \
+            MOV64x(xRIP, dyn->last_ip);              \
+        } else                                       \
+            TABLE64(xRIP, dyn->last_ip);             \
+    }
+#define GETIP_(A)                                         \
+    if (dyn->last_ip && ((A)-dyn->last_ip) < 2048) {      \
+        int64_t _delta_ip = (A)-dyn->last_ip;             \
+        if (_delta_ip) { ADDI_D(xRIP, xRIP, _delta_ip); } \
+    } else {                                              \
+        if ((A) < 0xffffffff) {                           \
+            MOV64x(xRIP, (A));                            \
+        } else                                            \
+            TABLE64(xRIP, (A));                           \
+    }
+#endif
+#define CLEARIP() dyn->last_ip = 0
+
+#define MODREG ((nextop & 0xC0) == 0xC0)
+
+void la464_epilog(void);
+void* la464_next(x64emu_t* emu, uintptr_t addr);
+
+#ifndef STEPNAME
+#define STEPNAME3(N, M) N##M
+#define STEPNAME2(N, M) STEPNAME3(N, M)
+#define STEPNAME(N)     STEPNAME2(N, STEP)
+#endif
+
+#define native_pass STEPNAME(native_pass)
+
+#define dynarec64_00 STEPNAME(dynarec64_00)
+
+#define geted          STEPNAME(geted)
+#define geted32        STEPNAME(geted32)
+#define jump_to_epilog STEPNAME(jump_to_epilog)
+#define jump_to_next   STEPNAME(jump_to_next)
+#define call_c         STEPNAME(call_c)
+
+#define fpu_reset_cache     STEPNAME(fpu_reset_cache)
+#define fpu_propagate_stack STEPNAME(fpu_propagate_stack)
+#define fpu_purgecache      STEPNAME(fpu_purgecache)
+#define fpu_reflectcache    STEPNAME(fpu_reflectcache)
+#define fpu_unreflectcache  STEPNAME(fpu_unreflectcache)
+
+/* setup r2 to address pointed by */
+uintptr_t geted(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
+
+/* setup r2 to address pointed by */
+uintptr_t geted32(dynarec_la464_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
+
+// generic x64 helper
+void jump_to_epilog(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst);
+void jump_to_next(dynarec_la464_t* dyn, uintptr_t ip, int reg, int ninst, int is32bits);
+void call_c(dynarec_la464_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
+
+// reset the cache with n
+void fpu_reset_cache(dynarec_la464_t* dyn, int ninst, int reset_n);
+// propagate stack state
+void fpu_propagate_stack(dynarec_la464_t* dyn, int ninst);
+// purge the FPU cache (needs 3 scratch registers)
+void fpu_purgecache(dynarec_la464_t* dyn, int ninst, int next, int s1, int s2, int s3);
+void fpu_reflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3);
+void fpu_unreflectcache(dynarec_la464_t* dyn, int ninst, int s1, int s2, int s3);
+
+uintptr_t dynarec64_00(dynarec_la464_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+
+#if STEP < 3
+#define PASS3(A)
+#else
+#define PASS3(A) A
+#endif
+
+#if STEP < 3
+#define MAYUSE(A) (void)A
+#else
+#define MAYUSE(A)
+#endif
+
+#define NOTEST(s1)                                       \
+    if (box64_dynarec_test) {                            \
+        ST_W(xZR, xEmu, offsetof(x64emu_t, test.test));  \
+        ST_W(xZR, xEmu, offsetof(x64emu_t, test.clean)); \
+    }
+
+#define GOTEST(s1, s2)                                 \
+    if (box64_dynarec_test) {                          \
+        MOV32w(s2, 1);                                 \
+        ST_W(s2, xEmu, offsetof(x64emu_t, test.test)); \
+    }
+
+#endif //__DYNAREC_LA464_HELPER_H__
\ No newline at end of file
diff --git a/src/dynarec/la464/dynarec_la464_jmpnext.c b/src/dynarec/la464/dynarec_la464_jmpnext.c
new file mode 100644
index 00000000..5a798e02
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_jmpnext.c
@@ -0,0 +1,13 @@
+#include <stdint.h>
+
+#include "la464_emitter.h"
+
+#define EMIT(A) *block = (A); ++block;
+void CreateJmpNext(void* addr, void* next)
+{
+    uint32_t* block = (uint32_t*)addr;
+    uintptr_t diff = (intptr_t)next - (intptr_t)addr;
+    PCADDU12I(x2, SPLIT20(diff));
+    LD_D(x2, x2, SPLIT12(diff));
+    BR(x2);
+}
\ No newline at end of file
diff --git a/src/dynarec/la464/dynarec_la464_pass0.h b/src/dynarec/la464/dynarec_la464_pass0.h
new file mode 100644
index 00000000..3f6a68ce
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_pass0.h
@@ -0,0 +1,47 @@
+#define INIT uintptr_t sav_addr = addr
+#define FINI                           \
+    dyn->isize = addr - sav_addr;      \
+    dyn->insts[ninst].x64.addr = addr; \
+    if (ninst) dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr
+
+#define MESSAGE(A, ...)
+#define READFLAGS(A)                     \
+    dyn->insts[ninst].x64.use_flags = A; \
+    dyn->f.dfnone = 1;                   \
+    dyn->f.pending = SF_SET
+#define EMIT(A) dyn->native_size += 4
+#define BARRIER(A)                                 \
+    if (A != BARRIER_MAYBE) {                      \
+        fpu_purgecache(dyn, ninst, 0, x1, x2, x3); \
+        dyn->insts[ninst].x64.barrier = A;         \
+    } else                                         \
+        dyn->insts[ninst].barrier_maybe = 1
+#define NEW_INST                                                                                                  \
+    ++dyn->size;                                                                                                  \
+    if (dyn->size + 3 >= dyn->cap) {                                                                              \
+        dyn->insts = (instruction_native_t*)dynaRealloc(dyn->insts, sizeof(instruction_native_t) * dyn->cap * 2); \
+        memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_native_t) * dyn->cap);                                \
+        dyn->cap *= 2;                                                                                            \
+    }                                                                                                             \
+    dyn->insts[ninst].x64.addr = ip;                                                                              \
+    dyn->insts[ninst].f_entry = dyn->f;                                                                           \
+    if (ninst) { dyn->insts[ninst - 1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst - 1].x64.addr; }
+#define INST_EPILOG                    \
+    dyn->insts[ninst].f_exit = dyn->f; \
+    dyn->insts[ninst].x64.has_next = (ok > 0) ? 1 : 0;
+#define INST_NAME(name)
+#define DEFAULT                                                                                                                                     \
+    --dyn->size;                                                                                                                                    \
+    *ok = -1;                                                                                                                                       \
+    if (box64_dynarec_log >= LOG_INFO || box64_dynarec_dump || box64_dynarec_missing) {                                                             \
+        dynarec_log(LOG_NONE, "%p: Dynarec stopped because of %sOpcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \
+            (void*)ip, rex.is32bits ? "32bits " : "",                                                                                               \
+            PKip(0),                                                                                                                                \
+            PKip(1), PKip(2), PKip(3),                                                                                                              \
+            PKip(4), PKip(5), PKip(6),                                                                                                              \
+            PKip(7), PKip(8), PKip(9),                                                                                                              \
+            PKip(10), PKip(11), PKip(12),                                                                                                           \
+            PKip(13), PKip(14));                                                                                                                    \
+        printFunctionAddr(ip, " => ");                                                                                                              \
+        dynarec_log(LOG_NONE, "\n");                                                                                                                \
+    }
diff --git a/src/dynarec/la464/dynarec_la464_pass1.h b/src/dynarec/la464/dynarec_la464_pass1.h
new file mode 100644
index 00000000..7d616b22
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_pass1.h
@@ -0,0 +1,11 @@
+#define INIT
+#define FINI
+#define MESSAGE(A, ...)
+#define EMIT(A)
+#define NEW_INST \
+    dyn->insts[ninst].f_entry = dyn->f;
+
+#define INST_EPILOG \
+    dyn->insts[ninst].f_exit = dyn->f
+
+#define INST_NAME(name)
\ No newline at end of file
diff --git a/src/dynarec/la464/dynarec_la464_pass2.h b/src/dynarec/la464/dynarec_la464_pass2.h
new file mode 100644
index 00000000..df7d7809
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_pass2.h
@@ -0,0 +1,26 @@
+#define INIT dyn->native_size = 0
+#define FINI                                                                                                                                                   \
+    if (ninst) {                                                                                                                                               \
+        dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size);                                                              \
+        dyn->insts_size += 1 + ((dyn->insts[ninst].x64.size > (dyn->insts[ninst].size / 4)) ? dyn->insts[ninst].x64.size : (dyn->insts[ninst].size / 4)) / 15; \
+    }
+
+#define MESSAGE(A, ...)
+#define EMIT(A)                      \
+    do {                             \
+        dyn->insts[ninst].size += 4; \
+        dyn->native_size += 4;       \
+    } while (0)
+#define NEW_INST                                                                                                                                                               \
+    if (ninst) {                                                                                                                                                               \
+        dyn->insts[ninst].address = (dyn->insts[ninst - 1].address + dyn->insts[ninst - 1].size);                                                                              \
+        dyn->insts_size += 1 + ((dyn->insts[ninst - 1].x64.size > (dyn->insts[ninst - 1].size / 4)) ? dyn->insts[ninst - 1].x64.size : (dyn->insts[ninst - 1].size / 4)) / 15; \
+    }
+#define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size;
+#define INST_NAME(name)
+#define TABLE64(A, V)         \
+    {                         \
+        Table64(dyn, (V), 2); \
+        EMIT(0);              \
+        EMIT(0);              \
+    }
diff --git a/src/dynarec/la464/dynarec_la464_pass3.h b/src/dynarec/la464/dynarec_la464_pass3.h
new file mode 100644
index 00000000..ecaf8a77
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_pass3.h
@@ -0,0 +1,29 @@
+#define INIT
+#define FINI                                                                                              \
+    if (ninst)                                                                                            \
+        addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size / 4); \
+    addInst(dyn->instsize, &dyn->insts_size, 0, 0);
+#define EMIT(A)                                                          \
+    do {                                                                 \
+        if (box64_dynarec_dump) print_opcode(dyn, ninst, (uint32_t)(A)); \
+        if ((uintptr_t)dyn->block < dyn->tablestart)                     \
+            *(uint32_t*)(dyn->block) = (uint32_t)(A);                    \
+        dyn->block += 4;                                                 \
+        dyn->native_size += 4;                                           \
+        dyn->insts[ninst].size2 += 4;                                    \
+    } while (0)
+
+#define MESSAGE(A, ...) \
+    if (box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
+#define NEW_INST \
+    if (ninst)   \
+        addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst - 1].x64.size, dyn->insts[ninst - 1].size / 4);
+#define INST_EPILOG
+#define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex)
+#define TABLE64(A, V)                                 \
+    {                                                 \
+        int val64offset = Table64(dyn, (V), 3);       \
+        MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); \
+        PCADDU12I(A, SPLIT20(val64offset));           \
+        LD_D(A, A, SPLIT12(val64offset));             \
+    }
diff --git a/src/dynarec/la464/dynarec_la464_private.h b/src/dynarec/la464/dynarec_la464_private.h
new file mode 100644
index 00000000..89b248b0
--- /dev/null
+++ b/src/dynarec/la464/dynarec_la464_private.h
@@ -0,0 +1,88 @@
+#ifndef __DYNAREC_LA464_PRIVATE_H_
+#define __DYNAREC_LA464_PRIVATE_H_
+
+#include "../dynarec_private.h"
+
+typedef struct x64emu_s x64emu_t;
+typedef struct dynablock_s dynablock_t;
+typedef struct instsize_s instsize_t;
+
+#define BARRIER_MAYBE   8
+
+typedef struct flagcache_s {
+    int                 pending;    // is there a pending flags here, or to check?
+    int                 dfnone;     // if deferred flags is already set to df_none
+} flagcache_t;
+
+typedef struct instruction_la464_s {
+    instruction_x64_t   x64;
+    uintptr_t           address;    // (start) address of the arm emitted instruction
+    uintptr_t           epilog;     // epilog of current instruction (can be start of next, or barrier stuff)
+    int                 size;       // size of the arm emitted instruction
+    int                 size2;      // size of the arm emitted instrucion after pass2
+    int                 pred_sz;    // size of predecessor list
+    int                 *pred;      // predecessor array
+    uintptr_t           mark[3];
+    uintptr_t           markf[2];
+    uintptr_t           markseg;
+    uintptr_t           marklock;
+    int                 pass2choice;// value for choices that are fixed on pass2 for pass3
+    uintptr_t           natcall;
+    int                 retn;
+    uint8_t             barrier_maybe;
+    uint8_t             will_write;
+    uint8_t             last_write;
+    flagcache_t         f_exit;     // flags status at end of instruction
+    flagcache_t         f_entry;    // flags status before the instruction begin
+} instruction_la464_t;
+
+typedef struct dynarec_la464_s {
+    instruction_la464_t* insts;
+    int32_t              size;
+    int32_t              cap;
+    uintptr_t            start;      // start of the block
+    uint32_t             isize;      // size in bytes of x64 instructions included
+    void*                block;      // memory pointer where next instruction is emitted
+    uintptr_t            native_start;  // start of the arm code
+    size_t               native_size;   // size of emitted arm code
+    uintptr_t            last_ip;    // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something
+    uint64_t*            table64;    // table of 64bits values
+    int                  table64size;// size of table (will be appended at end of executable code)
+    int                  table64cap;
+    uintptr_t            tablestart;
+    uintptr_t            jmp_next;   // address of the jump_next address
+    flagcache_t          f;
+    uintptr_t*           next;       // variable array of "next" jump address
+    int                  next_sz;
+    int                  next_cap;
+    int*                 jmps;       // variable array of jump instructions
+    int                  jmp_sz;
+    int                  jmp_cap;
+    int*                 predecessor;// single array of all predecessor
+    dynablock_t*         dynablock;
+    instsize_t*          instsize;
+    size_t               insts_size; // size of the instruction size array (calculated)
+    uintptr_t            forward;    // address of the last end of code while testing forward
+    uintptr_t            forward_to; // address of the next jump to (to check if everything is ok)
+    int32_t              forward_size;   // size at the forward point
+    int                  forward_ninst;  // ninst at the forward point
+    uint8_t              smread;    // for strongmem model emulation
+    uint8_t              smwrite;    // for strongmem model emulation
+    uint8_t              always_test;
+    uint8_t              abort;
+} dynarec_la464_t;
+
+void add_next(dynarec_la464_t *dyn, uintptr_t addr);
+uintptr_t get_closest_next(dynarec_la464_t *dyn, uintptr_t addr);
+void add_jump(dynarec_la464_t *dyn, int ninst);
+int get_first_jump(dynarec_la464_t *dyn, int next);
+int is_nops(dynarec_la464_t *dyn, uintptr_t addr, int n);
+int is_instructions(dynarec_la464_t *dyn, uintptr_t addr, int n);
+
+int Table64(dynarec_la464_t *dyn, uint64_t val, int pass);  // add a value to table64 (if needed) and gives back the imm19 to use in LDR_literal
+
+void CreateJmpNext(void* addr, void* next);
+
+#define GO_TRACE(A, B, s0)  \
+
+#endif //__DYNAREC_ARM_PRIVATE_H_
diff --git a/src/dynarec/la464/la464_emitter.h b/src/dynarec/la464/la464_emitter.h
new file mode 100644
index 00000000..43cd81c1
--- /dev/null
+++ b/src/dynarec/la464/la464_emitter.h
@@ -0,0 +1,369 @@
+#ifndef __LA464_EMITTER_H__
+#define __LA464_EMITTER_H__
+/*
+    LA464 Emitter
+*/
+
+// LA464 ABI
+/*
+Name     Alias     Meaning                         saver
+---------------------------------------------------------
+r0       zero      Zero register                   -
+r1       ra        Return address                  Callee
+r2       tp        Thread pointer                  -
+r3       sp        Stack pointer                   Callee
+r4-r5    a0-a1     Function arguments,Return val.  Caller
+r6-r11   a2-a7     Function arguments              Caller
+r12-r20  t0-t8     Temp registers                  Caller
+r21      Reserved  Non-allocatable                 -
+r22      fp/s9     Frame pointer/Static register   Callee
+r23-31   s0-s8     Static registers                Callee
+---------------------------------------------------------
+f0-f1    fa0-fa1   Function arguments,Return val.  Caller
+f2-f7    fa2-fa7   Function arguments              Caller
+f8-f23   ft0-ft15  Temp registers                  Caller
+f24-f31  fs0-fs7   Static registers                Callee
+*/
+/*
+ LA464 GPR mapping
+ There is no 15 registers free, so split the regs in 2 part
+ AX..DI : r12-r19
+ R8..R15: r23-r30
+ flags in r31
+ ip in r20
+*/
+// x86 Register mapping
+#define xRAX    12
+#define xRCX    13
+#define xRDX    14
+#define xRBX    15
+#define xRSP    16
+#define xRBP    17
+#define xRSI    18
+#define xRDI    19
+#define xR8     23
+#define xR9     24
+#define xR10    25
+#define xR11    26
+#define xR12    27
+#define xR13    28
+#define xR14    29
+#define xR15    30
+#define xFlags  31
+#define xRIP    20
+// function to move from x86 regs number to LA464 reg number
+#define TO_LA464(A) ((A)>7)?((A)+15):((A)+12)
+// function to move from LA464 regs number to x86 reg number
+#define FROM_LA464(A) ((A)>22)?((A)-15):((A)-12)
+// 32bits version
+#define wEAX    xRAX
+#define wECX    xRCX
+#define wEDX    xRDX
+#define wEBX    xRBX
+#define wESP    xRSP
+#define wEBP    xRBP
+#define wESI    xRSI
+#define wEDI    xRDI
+#define wR8     xR8
+#define wR9     xR9
+#define wR10    xR10
+#define wR11    xR11
+#define wR12    xR12
+#define wR13    xR13
+#define wR14    xR14
+#define wR15    xR15
+#define wFlags  xFlags
+// scratch registers
+#define x1      5
+#define x2      6
+#define x3      7
+#define x4      8
+#define x5      9
+#define x6      10
+// 32bits version of scratch
+#define w1      x1
+#define w2      x2
+#define w3      x3
+#define w4      x4
+#define w5      x5
+#define w6      x6
+// emu is r0
+#define xEmu    4
+// LA464 RA
+#define xRA     1
+#define ra      xRA
+// LA464 SP
+#define xSP     3
+// xZR regs
+#define xZR     0
+#define wZR     xZR
+#define r0      xZR
+
+// split a 32bits value in 20bits + 12bits, adjust the upper part is 12bits is negative
+#define SPLIT20(A) (((A) + 0x800) >> 12)
+#define SPLIT12(A) ((A) & 0xfff)
+
+// ZERO the upper part
+#define ZEROUP(r)               \
+    do {                        \
+        MOV32w(x2, 0xffffffff); \
+        AND(r, r, x2);          \
+    } while (0);
+
+#define type_4R(opc, ra, rk, rj, rd)     ((opc) << 20 | (ra) << 15 | (rk) << 10 | (rj) << 5 | (rd))
+#define type_3R(opc, rk, rj, rd)         ((opc) << 15 | (rk) << 10 | (rj) << 5 | (rd))
+#define type_3RI2(opc, imm2, rk, rj, rd) ((opc) << 17 | ((imm2) & 0x3) << 15 | (rk) << 10 | (rj) << 5 | (rd))
+#define type_2R(opc, rj, rd)             ((opc) << 10 | (rj) << 5 | (rd))
+#define type_2RI5(opc, imm5, rj, rd)     ((opc) << 15 | ((imm5) & 0x1F) << 10 | (rj) << 5 | (rd))
+#define type_2RI6(opc, imm6, rj, rd)     ((opc) << 16 | ((imm6) & 0x3F) << 10 | (rj) << 5 | (rd))
+#define type_2RI8(opc, imm8, rj, rd)     ((opc) << 18 | ((imm8) & 0xFF) << 10 | (rj) << 5 | (rd))
+#define type_2RI12(opc, imm12, rj, rd)   ((opc) << 22 | ((imm12) & 0xFFF) << 10 | (rj) << 5 | (rd))
+#define type_2RI14(opc, imm14, rj, rd)   ((opc) << 24 | ((imm14) & 0x3FFF) << 10 | (rj) << 5 | (rd))
+#define type_2RI16(opc, imm16, rj, rd)   ((opc) << 26 | ((imm16) & 0xFFFF) << 10 | (rj) << 5 | (rd))
+#define type_1RI20(opc, imm20, rd)       ((opc) << 25 | ((imm20) & 0xFFFFF) << 5 | (rd))
+#define type_1RI21(opc, imm21, rj)       ((opc) << 26 | ((imm21) & 0xFFFF) << 10 | (rj) << 5 | ((imm21) & 0x1F0000) >> 16)
+#define type_hint(opc, imm15)            ((opc) << 15 | ((imm15) & 0x7FFF))
+#define type_I26(opc, imm26)             ((opc) << 26 | ((imm26) & 0xFFFF) << 10 | ((imm26) & 0x3FF0000))
+
+// tmp = GR[rj][31:0] + GR[rk][31:0]
+// Gr[rd] = SignExtend(tmp[31:0], GRLEN)
+#define ADD_W(rd, rj, rk) EMIT(type_3R(0b00000000000100000, rk, rj, rd))
+// tmp = GR[rj][31:0] - GR[rk][31:0]
+// Gr[rd] = SignExtend(tmp[31:0], GRLEN)
+#define SUB_W(rd, rj, rk) EMIT(type_3R(0b00000000000100010, rk, rj, rd))
+// tmp = GR[rj][63:0] + GR[rk][63:0]
+// Gr[rd] = tmp[63:0]
+#define ADD_D(rd, rj, rk) EMIT(type_3R(0b00000000000100001, rk, rj, rd))
+// tmp = GR[rj][63:0] - GR[rk][63:0]
+// Gr[rd] = tmp[63:0]
+#define SUB_D(rd, rj, rk) EMIT(type_3R(0b00000000000100011, rk, rj, rd))
+
+// tmp = GR[rj][31:0] + SignExtend(imm12, 32)
+// GR[rd] = SignExtend(tmp[31:0], GRLEN)
+#define ADDI_W(rd, rj, imm12) EMIT(type_2RI12(0b0000001010, imm12, rj, rd))
+// tmp = GR[rj][63:0] + SignExtend(imm12, 64)
+// GR[rd] = tmp[63:0]
+#define ADDI_D(rd, rj, imm12) EMIT(type_2RI12(0b0000001011, imm12, rj, rd))
+// tmp = GR[rj][63:0] + SignExtend({imm16, 16'b0}, 64)
+// GR[rd] = tmp[63:0]
+#define ADDU16I_D(rd, rj, imm16) EMIT(type_2RI16(0b000100, imm16, rj, rd))
+
+// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0]
+// GR[rd] = SignExtend(tmp[31:0], GRLEN)
+#define ALSL_W(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000010, imm2, rk, rj, rd))
+// tmp = (GR[rj][31:0] << (imm2 + 1)) + GR[rk][31:0]
+// GR[rd] = ZeroExtend(tmp[31:0], GRLEN)
+#define ALSL_WU(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000000011, imm2, rk, rj, rd))
+// tmp = (GR[rj][63:0] << (imm2 + 1)) + GR[rk][63:0]
+// GR[rd] = tmp[63:0]
+#define ALSL_D(rd, rj, rk, imm2) EMIT(type_3RI2(0b000000000010110, imm2, rk, rj, rd))
+
+// GR[rd] = SignExtend({imm20, 12'b0}, GRLEN)
+#define LU12I_W(rd, imm20) EMIT(type_1RI20(0b0001010, imm20, rd))
+// GR[rd] = {SignExtend(imm20, 32), GR[rd][31:0]}
+#define LU32I_D(rd, imm20) EMIT(type_1RI20(0b0001011, imm20, rd))
+// GR[rd] = {imm12, GR[rj][51:0]}
+#define LU52I_D(rd, rj, imm12) EMIT(type_2RI12(0b0000001100, imm12, rj, rd))
+
+// GR[rd] = PC + SignExtend({imm20, 2'b0}, GRLEN)
+#define PCADDI(rd, imm20) EMIT(type_1RI20(0b0001100, imm20, rd))
+// GR[rd] = PC + SignExtend({imm20, 12'b0}, GRLEN)
+#define PCADDU12I(rd, imm20) EMIT(type_1RI20(0b0001110, imm20, rd))
+// GR[rd] = PC + SignExtend({imm20, 18'b0}, GRLEN)
+#define PCADDU18I(rd, imm20) EMIT(type_1RI20(0b0001111, imm20, rd))
+// tmp = PC + SignExtend({imm20, 12'b0}, GRLEN)
+// GR[rd] = {tmp[GRLEN-1:12], 12'b0}
+#define PCALAU12I(rd, imm20) EMIT(type_1RI20(0b0001101, imm20, rd))
+
+// GR[rd] = GR[rj] & GR[rk]
+#define AND(rd, rj, rk) EMIT(type_3R(0b00000000000101001, rk, rj, rd))
+// GR[rd] = GR[rj] | GR[rk]
+#define OR(rd, rj, rk) EMIT(type_3R(0b00000000000101010, rk, rj, rd))
+// GR[rd] = ~(GR[rj] | GR[rk])
+#define NOR(rd, rj, rk) EMIT(type_3R(0b00000000000101000, rk, rj, rd))
+// GR[rd] = GR[rj] ^ GR[rk]
+#define XOR(rd, rj, rk) EMIT(type_3R(0b00000000000101011, rk, rj, rd))
+// GR[rd] = GR[rj] & (~GR[rk])
+#define ANDN(rd, rj, rk) EMIT(type_3R(0b00000000000101101, rk, rj, rd))
+// GR[rd] = GR[rj] | (~GR[rk])
+#define ORN(rd, rj, rk) EMIT(type_3R(0b00000000000101100, rk, rj, rd))
+
+// GR[rd] = GR[rj] & ZeroExtend(imm12, GRLEN)
+#define ANDI(rd, rj, imm12) EMIT(type_2RI12(0b0000001101, imm12, rj, rd))
+// GR[rd] = GR[rj] | ZeroExtend(imm12, GRLEN)
+#define ORI(rd, rj, imm12) EMIT(type_2RI12(0b0000001110, imm12, rj, rd))
+// GR[rd] = GR[rj] ^ ZeroExtend(imm12, GRLEN)
+#define XORI(rd, rj, imm12) EMIT(type_2RI12(0b0000001111, imm12, rj, rd))
+
+// GR[rd] = SLL(GR[rj][63:0], imm6) (Shift Left Logical)
+#define SLLI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001000001, imm6, rj, rd))
+// GR[rd] = SRL(GR[rj][63:0], imm6) (Shift Right Logical)
+#define SRLI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001000101, imm6, rj, rd))
+// GR[rd] = SRA(GR[rj][63:0], imm6) (Shift Right Arithmetic)
+#define SRAI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001001001, imm6, rj, rd))
+// GR[rd] = ROTR(GR[rj][63:0], imm6) (Rotate To Right)
+#define ROTRI_D(rd, rj, imm6) EMIT(type_2RI6(0b0000000001001101, imm6, rj, rd))
+
+// rd = rj + (rk << imm6)
+#define ADDSL(rd, rs1, rs2, imm6, scratch) \
+    if (!(imm6)) {                         \
+        ADD_D(rd, rs1, rs2);               \
+    } else {                               \
+        SLLI_D(scratch, rs2, imm6);        \
+        ADD_D(rd, rs1, scratch);           \
+    }
+
+// if GR[rj] == 0:
+//     PC = PC + SignExtend({imm21, 2'b0}, GRLEN)
+#define BEQZ(rj, imm21) EMIT(type_1RI21(0b010000, (imm21) >> 2, rj))
+// if GR[rj] != 0:
+//     PC = PC + SignExtend({imm21, 2'b0}, GRLEN)
+#define BNEZ(rj, imm21) EMIT(type_1RI21(0b010001, (imm21) >> 2, rj))
+
+// GR[rd] = PC + 4
+// PC = GR[rj] + SignExtend({imm16, 2'b0}, GRLEN)
+#define JIRL(rd, rj, imm16) EMIT(type_2RI16(0b010011, imm16, rj, rd))
+// PC = GR[rj]
+#define BR(rj) JIRL(xZR, rj, 0x0)
+
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// byte = MemoryLoad(paddr, BYTE)
+// GR[rd] = SignExtend(byte, GRLEN)
+#define LD_B(rd, rj, imm12) EMIT(type_2RI12(0b0010100000, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// haldword = MemoryLoad(paddr, HALFWORD)
+// GR[rd] = SignExtend(halfword, GRLEN)
+#define LD_H(rd, rj, imm12) EMIT(type_2RI12(0b0010100001, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// word = MemoryLoad(paddr, WORD)
+// GR[rd] = SignExtend(word, GRLEN)
+#define LD_W(rd, rj, imm12) EMIT(type_2RI12(0b0010100010, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// GR[rd] = MemoryLoad(paddr, DOUBLEWORD)
+#define LD_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100011, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// byte = MemoryLoad(paddr, BYTE)
+// GR[rd] = ZeroExtend(byte, GRLEN)
+#define LD_BU(rd, rj, imm12) EMIT(type_2RI12(0b0010101000, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// halfword = MemoryLoad(paddr, HALFWORD)
+// GR[rd] = ZeroExtend(halfword, GRLEN)
+#define LD_HU(rd, rj, imm12) EMIT(type_2RI12(0b0010101001, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// word = MemoryLoad(paddr, WORD)
+// GR[rd] = ZeroExtend(word, GRLEN)
+#define LD_WU(rd, rj, imm12) EMIT(type_2RI12(0b0010101010, imm12, rj, rd))
+
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// MemoryStore(GR[rd][7:0], paddr, BYTE)
+#define ST_B(rd, rj, imm12) EMIT(type_2RI12(0b0010100100, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// MemoryStore(GR[rd][15:0], paddr, HALFWORD)
+#define ST_H(rd, rj, imm12) EMIT(type_2RI12(0b0010100101, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// MemoryStore(GR[rd][31:0], paddr, WORD)
+#define ST_W(rd, rj, imm12) EMIT(type_2RI12(0b0010100110, imm12, rj, rd))
+// vaddr = GR[rj] + SignExtend(imm12, GRLEN)
+// AddressComplianceCheck(vaddr)
+// paddr = AddressTranslation(vaddr)
+// MemoryStore(GR[rd][63:0], paddr, DOUBLEWORD)
+#define ST_D(rd, rj, imm12) EMIT(type_2RI12(0b0010100111, imm12, rj, rd))
+
+// GR[rd] = imm32
+#define MOV32w(rd, imm32)               \
+    if (((uint32_t)(imm32)) > 0xfffu) { \
+        LU12I_W(rd, (imm32) >> 12);     \
+        ORI(rd, rd, imm32);             \
+    } else {                            \
+        ORI(rd, xZR, imm32);            \
+    }
+// GR[rd] = imm64
+#define MOV64x(rd, imm64)                           \
+    MOV32w(rd, imm64);                              \
+    if (((uint64_t)(imm64)) > 0xffffffffu) {        \
+        LU32I_D(rd, ((uint64_t)(imm64)) >> 32);     \
+        LU52I_D(rd, rd, ((uint64_t)(imm64)) >> 52); \
+    }
+
+// rd[63:0] = rj[63:0] (pseudo instruction)
+#define MV(rd, rj) ADDI_D(rd, rj, 0)
+// rd = rj (pseudo instruction)
+#define MVxw(rd, rj)            \
+    if (rex.w) {                \
+        MV(rd, rj);             \
+    } else {                    \
+        MOV32w(x2, 0xffffffff); \
+        AND(rd, rj, x2);        \
+    }
+// rd = rj (pseudo instruction)
+#define MVz(rd, rj)             \
+    if (rex.is32bits) {         \
+        MOV32w(x2, 0xffffffff); \
+        AND(rd, rj, x2);        \
+    } else {                    \
+        MV(rd, rj);             \
+    }
+
+// PUSH / POP reg[0:63]
+#define PUSH1(reg)              \
+    do {                        \
+        ST_D(reg, xRSP, -8);    \
+        ADDI_D(xRSP, xRSP, -8); \
+    } while (0);
+#define POP1(reg)                               \
+    do {                                        \
+        LD_D(reg, xRSP, 0);                     \
+        if (reg != xRSP) ADDI_D(xRSP, xRSP, 8); \
+    } while (0);
+
+// PUSH / POP reg[0:31]
+#define PUSH1_32(reg)           \
+    do {                        \
+        ST_W(reg, xRSP, -4);    \
+        ADDI_W(xRSP, xRSP, -4); \
+    } while (0);
+#define POP1_32(reg)                            \
+    do {                                        \
+        LD_WU(reg, xRSP, 0);                    \
+        if (reg != xRSP) ADDI_W(xRSP, xRSP, 4); \
+    } while (0);
+
+// POP reg
+#define POP1z(reg)      \
+    if (rex.is32bits) { \
+        POP1_32(reg);   \
+    } else {            \
+        POP1(reg);      \
+    }
+// PUSH reg
+#define PUSH1z(reg)     \
+    if (rex.is32bits) { \
+        PUSH1_32(reg);  \
+    } else {            \
+        PUSH1(reg);     \
+    }
+
+// DBAR hint
+#define DBAR(hint) EMIT(type_hint(0b00111000011100100, hint))
+
+#endif //__ARM64_EMITTER_H__
\ No newline at end of file
diff --git a/src/dynarec/la464/la464_epilog.S b/src/dynarec/la464/la464_epilog.S
new file mode 100644
index 00000000..41eae0cc
--- /dev/null
+++ b/src/dynarec/la464/la464_epilog.S
@@ -0,0 +1,55 @@
+//la464 epilog for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global la464_epilog
+la464_epilog:
+    //update register -> emu
+    st.d   $r12, $r4, (8 * 0)
+    st.d   $r13, $r4, (8 * 1)
+    st.d   $r14, $r4, (8 * 2)
+    st.d   $r15, $r4, (8 * 3)
+    st.d   $r16, $r4, (8 * 4)
+    st.d   $r17, $r4, (8 * 5)
+    st.d   $r18, $r4, (8 * 6)
+    st.d   $r19, $r4, (8 * 7)
+    st.d   $r23, $r4, (8 * 8)
+    st.d   $r24, $r4, (8 * 9)
+    st.d   $r25, $r4, (8 * 10)
+    st.d   $r26, $r4, (8 * 11)
+    st.d   $r27, $r4, (8 * 12)
+    st.d   $r28, $r4, (8 * 13)
+    st.d   $r29, $r4, (8 * 14)
+    st.d   $r30, $r4, (8 * 15)
+    st.d   $r31, $r4, (8 * 16) // xFlags
+    st.d   $r20, $r4, (8 * 17) // put back reg value in emu, including EIP (so x27 must be EIP now)
+    ld.d   $sp,  $r4, 552      // restore saved sp from emu->xSPSave, see la464_prolog
+    ld.d   $r11, $sp, -8
+    st.d   $r11, $r4, 552
+    // vpop {d8-d15}
+    ld.d   $r1,  $sp, (8 * 0) // load ra
+    ld.d   $r22, $sp, (8 * 1) // load fp
+    ld.d   $r23, $sp, (8 * 2)
+    ld.d   $r24, $sp, (8 * 3)
+    ld.d   $r25, $sp, (8 * 4)
+    ld.d   $r26, $sp, (8 * 5)
+    ld.d   $r27, $sp, (8 * 6)
+    ld.d   $r28, $sp, (8 * 7)
+    ld.d   $r29, $sp, (8 * 8)
+    ld.d   $r30, $sp, (8 * 9)
+    ld.d   $r31, $sp, (8 * 10)
+    fld.d  $f24, $sp, (8 * 11)
+    fld.d  $f25, $sp, (8 * 12)
+    fld.d  $f26, $sp, (8 * 13)
+    fld.d  $f27, $sp, (8 * 14)
+    fld.d  $f28, $sp, (8 * 15)
+    fld.d  $f29, $sp, (8 * 16)
+    fld.d  $f30, $sp, (8 * 17)
+    fld.d  $f31, $sp, (8 * 18)
+    addi.d $sp,  $sp, (8 * 19)
+    // end, return
+    ret
diff --git a/src/dynarec/la464/la464_lock.S b/src/dynarec/la464/la464_lock.S
new file mode 100644
index 00000000..9a728b14
--- /dev/null
+++ b/src/dynarec/la464/la464_lock.S
@@ -0,0 +1,186 @@
+// RV64 lock helper
+// there is 2 part: read and write
+// write return 0 on success, 1 on fail (value has been changed)
+
+.text
+.align 4
+
+.global la464_lock_xchg_dd
+.global la464_lock_xchg_d
+.global la464_lock_storeifnull
+.global la464_lock_storeifnull_d
+.global la464_lock_storeifref
+.global la464_lock_storeifref_d
+.global la464_lock_storeifref2_d
+.global la464_lock_decifnot0b
+.global la464_lock_storeb
+.global la464_lock_incif0
+.global la464_lock_decifnot0
+.global la464_lock_store
+.global la464_lock_store_dd
+.global la464_lock_get_b
+.global la464_lock_get_d
+.global la464_lock_get_dd
+.global la464_lock_cas_d
+.global la464_lock_cas_dd
+.global la464_lock_cas_dq
+
+la464_lock_xchg_dd:
+    // address is a0, value is a1, return old value in a0
+    amswap_db.d $a2, $a1, $a0
+    move        $a0, $a2
+    ret
+
+la464_lock_xchg_d:
+    // address is a0, value is a1, return old value in a0
+    amswap_db.w $a2, $a1, $a0
+    move        $a0, $a2
+    ret
+
+la464_lock_storeifnull:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
+    dbar 0
+    move $a3, $a1
+    ll.d $a2, $a0, 0
+    bnez $a2, 12
+    sc.d $a3, $a0, 0
+    beqz $a3, -16
+    move $a0, $a2
+    ret
+
+la464_lock_storeifnull_d:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
+    dbar 0
+    move $a3, $a1
+    ll.w $a2, $a0, 0
+    bnez $a2, 12
+    sc.w $a3, $a0, 0
+    beqz $a3, -16
+    move $a0, $a2
+    ret
+
+la464_lock_storeifref:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
+    dbar 0
+    move $a4, $a1
+    ll.d $a3, $a0, 0
+    bne  $a2, $a3, 24
+    sc.d $a4, $a0, 0
+    beqz $a4, -16
+    dbar 0
+    move $a0, $a1
+    ret
+    dbar 0
+    move $a0, $a3
+    ret
+
+la464_lock_storeifref_d:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
+    dbar 0
+    move $a4, $a1  
+    ll.w $a3, $a0, 0
+    bne  $a2, $a3, 24
+    sc.w $a4, $a0, 0
+    beqz $a4, -16
+    dbar 0
+    move $a0, $a1
+    ret
+    dbar 0
+    move $a0, $a3
+    ret
+
+la464_lock_storeifref2_d:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value
+    dbar 0
+    move $a4, $a1
+    ll.w $a3, $a0, 0
+    bne  $a2, $a3, 12
+    sc.w $a4, $a0, 0
+    beqz $a4, -16
+    move $a0, $a3
+    ret
+
+la464_lock_decifnot0b:
+    dbar       0
+    // TODO
+    ret
+
+la464_lock_storeb:
+    st.b $a1, $a0, 0
+    dbar 0
+    ret
+
+la464_lock_decifnot0:
+    dbar   0
+    ll.w   $a1, $a0, 0
+    beqz   $a1, 20
+    addi.d $a1, $a1, -1
+    move   $a2, $a1
+    sc.w   $a2, $a0, 0
+    beqz   $a2, -20
+    move   $a0, $a1
+    ret
+
+la464_lock_incif0:
+    dbar   0
+    ll.w   $a1, $a0, 0
+    bnez   $a1, 20
+    addi.d $a1, $a1, 1
+    move   $a2, $a1
+    sc.w   $a2, $a0, 0
+    beqz   $a2, -20
+    move   $a0, $a1
+    ret
+
+la464_lock_store:
+    st.w $a1, $a0, 0
+    dbar 0
+    ret
+
+la464_lock_store_dd:
+    st.d $a1, $a0, 0
+    dbar 0
+    ret
+
+la464_lock_get_b:
+    dbar 0
+    ld.b $a0, $a0, 0
+    ret
+
+la464_lock_get_d:
+    dbar 0
+    ld.w $a0, $a0, 0
+    ret
+
+la464_lock_get_dd:
+    dbar 0
+    ld.d $a0, $a0, 0
+    ret
+
+la464_lock_cas_d:
+    ll.w $a3, $a0, 0
+    bne  $a3, $a1, 16
+    sc.w $a2, $a0, 0
+    xori $a0, $a2, 1
+    ret
+    li.d $a0, 1
+    ret
+
+la464_lock_cas_dd:
+    ll.d $a3, $a0, 0
+    bne  $a3, $a1, 16
+    sc.d $a2, $a0, 0
+    xori $a0, $a2, 1
+    ret
+    li.d $a0, 1
+    ret
+
+la464_lock_cas_dq:
+    ll.d $a4, $a0, 0
+    bne  $a4, $a2, 20
+    sc.d $a1, $a0, 0
+    st.d $a3, $a0, 0
+    xori $a0, $a1, 1
+    ret
+    li.d $a0, 1
+    ret
diff --git a/src/dynarec/la464/la464_lock.h b/src/dynarec/la464/la464_lock.h
new file mode 100644
index 00000000..8cff10d6
--- /dev/null
+++ b/src/dynarec/la464/la464_lock.h
@@ -0,0 +1,73 @@
+#ifndef __LA464_LOCK__H__
+#define __LA464_LOCK__H__
+#include <stdint.h>
+
+// Atomically exchange value at [p] with val, return old p
+extern uintptr_t la464_lock_xchg_dd(void* p, uintptr_t val);
+
+// Atomically exchange value at [p] with val, return old p
+extern uint32_t la464_lock_xchg_d(void* p, uint32_t val);
+
+// Atomically store value to [p] only if [p] is NULL. Return old [p] value
+extern uint32_t la464_lock_storeifnull_d(void*p, uint32_t val);
+
+// Atomically store value to [p] only if [p] is NULL. Return old [p] value
+extern void* la464_lock_storeifnull(void*p, void* val);
+
+// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern void* la464_lock_storeifref(void*p, void* val, void* ref);
+
+// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern uint32_t la464_lock_storeifref_d(void*p, uint32_t val, uint32_t ref);
+
+// Atomically store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern uint32_t la464_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref);
+
+// decrement atomically the byte at [p] (but only if p not 0)
+extern void la464_lock_decifnot0b(void*p);
+
+// atomic store (with memory barrier)
+extern void la464_lock_storeb(void*p, uint8_t b);
+
+// increment atomically the int at [p] only if it was 0. Return the old value of [p]
+extern int la464_lock_incif0(void*p);
+
+// decrement atomically the int at [p] (but only if p not 0)
+extern int la464_lock_decifnot0(void*p);
+
+// atomic store (with memory barrier)
+extern void la464_lock_store(void*p, uint32_t v);
+
+// atomic store (with memory barrier)
+extern void la464_lock_store_dd(void*p, uint64_t v);
+
+// atomic get (with memory barrier)
+extern uint32_t la464_lock_get_b(void* p);
+
+// atomic get (with memory barrier)
+extern uint32_t la464_lock_get_d(void* p);
+
+// atomic get (with memory barrier)
+extern void* la464_lock_get_dd(void* p);
+
+// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int la464_lock_cas_d(void* p, int32_t ref, int32_t val);
+
+// Atomically store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int la464_lock_cas_dd(void* p, int64_t ref, int64_t val);
+
+// (mostly) Atomically store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int la464_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2);
+
+// Not defined in assembler but in dynarec_rv64_functions
+uint8_t extract_byte(uint32_t val, void* address);
+uint32_t insert_byte(uint32_t val, uint8_t b, void* address);
+uint16_t extract_half(uint32_t val, void* address);
+uint32_t insert_half(uint32_t val, uint16_t h, void* address);
+
+uint8_t la464_lock_xchg_b(void* addr, uint8_t v);
+uint16_t la464_lock_xchg_h(void* addr, uint16_t v);
+int la464_lock_cas_b(void* p, uint8_t ref, uint8_t val);
+int la464_lock_cas_h(void* p, uint16_t ref, uint16_t val);
+
+#endif  //__LA464_LOCK__H__
diff --git a/src/dynarec/la464/la464_next.S b/src/dynarec/la464/la464_next.S
new file mode 100644
index 00000000..e2c4924d
--- /dev/null
+++ b/src/dynarec/la464/la464_next.S
@@ -0,0 +1,52 @@
+//la464 update linker table for dynarec
+//called with pointer to emu as 1st parameter
+//and address of table to as 2nd parameter
+//ip is at r12
+
+.text
+.align 4
+
+.extern LinkNext
+
+.global la464_next
+
+    .8byte  0   // NULL pointer before la464_next, for getDB
+la464_next:
+    // emu is a0
+    // IP address is a1
+    addi.d $sp, $sp, -(8 * 12)
+    st.d   $a0, $sp, 0
+    st.d   $a1, $sp, 8
+    st.d   $r12, $sp, 16
+    st.d   $r13, $sp, 24
+    st.d   $r14, $sp, 32
+    st.d   $r15, $sp, 40
+    st.d   $r16, $sp, 48
+    st.d   $r17, $sp, 56
+    st.d   $r18, $sp, 64
+    st.d   $r19, $sp, 72
+    st.d   $r20, $sp, 80
+    st.d   $r30, $sp, 88 // also save r30(rip) to allow change in LinkNext
+
+    move   $a2, $ra      // "from" is in ra, so put in a2
+    addi.d $a3, $sp, 88  // a3 is address to change rip
+    // call the function
+    bl LinkNext
+    // preserve return value
+    move   $a3, $a0
+    // pop regs
+    ld.d   $a0, $sp, 0
+    ld.d   $a1, $sp, 8
+    ld.d   $r12, $sp, 16
+    ld.d   $r13, $sp, 24
+    ld.d   $r14, $sp, 32
+    ld.d   $r15, $sp, 40
+    ld.d   $r16, $sp, 48
+    ld.d   $r17, $sp, 56
+    ld.d   $r18, $sp, 64
+    ld.d   $r19, $sp, 72
+    ld.d   $r20, $sp, 80
+    ld.d   $r30, $sp, 88
+    addi.d $sp,  $sp, (8 * 12)
+    // return offset is jump address
+    jr     $a3
\ No newline at end of file
diff --git a/src/dynarec/la464/la464_printer.c b/src/dynarec/la464/la464_printer.c
new file mode 100644
index 00000000..5651ec12
--- /dev/null
+++ b/src/dynarec/la464/la464_printer.c
@@ -0,0 +1,285 @@
+#include <stdint.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "la464_printer.h"
+#include "debug.h"
+
+static const char* Xt[] = {"xZR", "r1", "r2", "sp", "xEmu", "r5", "r6", "r7", "r8", "r9", "r10", "r11", "xRAX", "xRCX", "xRDX", "xRBX", "xRSP", "xRBP", "xRSI", "xRDI", "xR8", "r21", "xR9", "xR10", "xR11", "xR12", "xR13", "xR14", "xR15", "xFlags", "xRIP", "r31"};
+
+typedef struct la464_print_s {
+    int d, j, k, a;
+    int i, u;
+} la464_print_t;
+
+int isMask(uint32_t opcode, const char* mask, la464_print_t *a)
+{
+    if(strlen(mask)!=32) {
+        printf_log(LOG_NONE, "Error: printer mask \"%s\" in not len 32 but %ld\n", mask, strlen(mask));
+        return 0;
+    }
+    memset(a, 0, sizeof(*a));
+    int i = 31;
+    while(*mask) {
+        uint8_t v = (opcode>>i)&1;
+        switch(*mask) {
+            case '0': if(v!=0) return 0; break;
+            case '1': if(v!=1) return 0; break;
+            case 'd': a->d = (a->d<<1) | v; break;
+            case 'j': a->j = (a->j<<1) | v; break;
+            case 'k': a->k = (a->k<<1) | v; break;
+            case 'a': a->a = (a->a<<1) | v; break;
+            case 'i': a->i = (a->i<<1) | v; break;
+            case 'u': a->u = (a->u<<1) | v; break;
+            default:
+                printf_log(LOG_NONE, "Warning, printer mask use unhandled '%c'\n", *mask);
+        }
+        mask++;
+        --i;
+    }
+    
+    return 1;
+}
+
+int64_t signExtend(uint32_t val, int sz)
+{
+    int64_t ret = val;
+    if((val>>(sz-1))&1)
+        ret |= (0xffffffffffffffffll<<sz);
+    return ret;
+}
+
+const char* la464_print(uint32_t opcode, uintptr_t addr)
+{
+    static char buff[200];
+    la464_print_t a;
+    #define Rd a.d
+    #define Rj a.j
+    #define Rk a.k
+    #define Ra a.a
+    #define imm a.i
+    #define imm_up a.u
+    // ADD.W
+    if(isMask(opcode, "00000000000100000kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ADD.W %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // SUB.W
+    if(isMask(opcode, "00000000000100010kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "SUB.W %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // ADD.D
+    if(isMask(opcode, "00000000000100001kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ADD.D %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // SUB.D
+    if(isMask(opcode, "00000000000100011kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "SUB.D %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // ADDI.W
+    if(isMask(opcode, "0000001010iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ADDI.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ADDI.D
+    if(isMask(opcode, "0000001011iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ADDI.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ADDU16I.D
+    if(isMask(opcode, "000100iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ADDU16I.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ALSL.W
+    if(isMask(opcode, "000000000000010iikkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ALSL.W %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm);
+        return buff;
+    }
+    // ALSL.WU
+    if(isMask(opcode, "000000000000011iikkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ALSL.WU %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm);
+        return buff;
+    }
+    // ALSL.D
+    if(isMask(opcode, "000000000010110iikkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ALSL.D %s, %s, %s, %d", Xt[Rd], Xt[Rj], Xt[Rk], imm);
+        return buff;
+    }
+    // LU12I.W
+    if(isMask(opcode, "0001010iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LU12I.W %s, %d", Xt[Rd], imm);
+        return buff;
+    }
+    // LU32I.D
+    if(isMask(opcode, "0001011iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LU32I.D %s, %d", Xt[Rd], imm);
+        return buff;
+    }
+    // LU52I.D
+    if(isMask(opcode, "0000001100iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LU52I.D %s, %s, %d", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // PCADDI
+    if(isMask(opcode, "0001100iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
+        snprintf(buff, sizeof(buff), "PCADDI %s, %d", Xt[Rd], imm);
+        return buff;
+    }
+    // PCADDU12I
+    if(isMask(opcode, "0001101iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
+        snprintf(buff, sizeof(buff), "PCADDU12I %s, %d", Xt[Rd], imm);
+        return buff;
+    }
+    // PCADDU18I
+    if(isMask(opcode, "0001110iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
+        snprintf(buff, sizeof(buff), "PCADDU18I %s, %d", Xt[Rd], imm);
+        return buff;
+    }
+    // PCALAU12I
+    if(isMask(opcode, "0001111iiiiiiiiiiiiiiiiiiiiddddd", &a)) {
+        snprintf(buff, sizeof(buff), "PCALAU12I %s, %d", Xt[Rd], imm);
+        return buff;
+    }
+    // AND
+    if(isMask(opcode, "00000000000101001kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "AND %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // OR
+    if(isMask(opcode, "00000000000101010kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "OR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // NOR
+    if(isMask(opcode, "00000000000101000kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "NOR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // XOR
+    if(isMask(opcode, "00000000000101011kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "XOR %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // ANDN
+    if(isMask(opcode, "00000000000101101kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ANDN %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // ORN
+    if(isMask(opcode, "00000000000101100kkkkkjjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ORN %s, %s, %s", Xt[Rd], Xt[Rj], Xt[Rk]);
+        return buff;
+    }
+    // ANDI
+    if(isMask(opcode, "0000001101iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ANDI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // ORI
+    if(isMask(opcode, "0000001110iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ORI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // XORI
+    if(isMask(opcode, "0000001111iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "XORI %s, %s, 0x%x", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // SLLI.D
+    if(isMask(opcode, "0000000001000001iiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "SLLI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // SRLI.D
+    if(isMask(opcode, "0000000001000101iiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "SRLI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // SRAI.D
+    if(isMask(opcode, "0000000001001001iiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "SRAI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // ROTRI.D
+    if(isMask(opcode, "0000000001001101iiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ROTRI.D %s, %s, %u", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // BEQZ
+    if(isMask(opcode, "010000iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) {
+        snprintf(buff, sizeof(buff), "BEQZ %s, %d", Xt[Rj], imm + (imm_up << 16));
+        return buff;
+    }
+    // BNEZ
+    if(isMask(opcode, "010001iiiiiiiiiiiiiiiijjjjjuuuuu", &a)) {
+        snprintf(buff, sizeof(buff), "BNEZ %s, %d", Xt[Rj], imm + (imm_up << 16));
+        return buff;
+    }
+    // JIRL
+    if(isMask(opcode, "010011iiiiiiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "JIRL %s, %s, %d", Xt[Rd], Xt[Rj], imm);
+        return buff;
+    }
+    // LD.B
+    if(isMask(opcode, "0010100000iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.B %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // LD.H
+    if(isMask(opcode, "0010100001iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.H %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // LD.W
+    if(isMask(opcode, "0010100010iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // LD.D
+    if(isMask(opcode, "0010100011iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // LD.BU
+    if(isMask(opcode, "0010101000iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.BU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // LD.HU
+    if(isMask(opcode, "0010101001iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.HU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // LD.WU
+    if(isMask(opcode, "0010101010iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "LD.WU %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ST.B
+    if(isMask(opcode, "0010100100iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ST.B %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ST.H
+    if(isMask(opcode, "0010100101iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ST.H %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ST.W
+    if(isMask(opcode, "0010100110iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ST.W %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    // ST.D
+    if(isMask(opcode, "0010100111iiiiiiiiiiiijjjjjddddd", &a)) {
+        snprintf(buff, sizeof(buff), "ST.D %s, %s, %d", Xt[Rd], Xt[Rj], signExtend(imm, 12));
+        return buff;
+    }
+    snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));
+    return buff;
+}
\ No newline at end of file
diff --git a/src/dynarec/la464/la464_printer.h b/src/dynarec/la464/la464_printer.h
new file mode 100644
index 00000000..e3ad8b2c
--- /dev/null
+++ b/src/dynarec/la464/la464_printer.h
@@ -0,0 +1,6 @@
+#ifndef _LA464_PRINTER_H_
+#define _LA464_PRINTER_H_
+
+const char* la464_print(uint32_t opcode, uint64_t addr);
+
+#endif //_LA464_PRINTER_H_
diff --git a/src/dynarec/la464/la464_prolog.S b/src/dynarec/la464/la464_prolog.S
new file mode 100644
index 00000000..aafe4dc7
--- /dev/null
+++ b/src/dynarec/la464/la464_prolog.S
@@ -0,0 +1,60 @@
+//loongarch prologue for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global la464_prolog
+la464_prolog:
+    //save all 18 used register
+    addi.d $sp,  $sp, -(8 * 19)
+    st.d   $r1,  $sp, (8 * 0) //save ra
+    st.d   $r22, $sp, (8 * 1) //save fp
+    // save s0 - s8
+    st.d   $r23, $sp, (8 * 2)
+    st.d   $r24, $sp, (8 * 3)
+    st.d   $r25, $sp, (8 * 4)
+    st.d   $r26, $sp, (8 * 5)
+    st.d   $r27, $sp, (8 * 6)
+    st.d   $r28, $sp, (8 * 7)
+    st.d   $r29, $sp, (8 * 8)
+    st.d   $r30, $sp, (8 * 9)
+    st.d   $r31, $sp, (8 * 10)
+    // save fs0 - fs7
+    fst.d  $f24, $sp, (8 * 11)
+    fst.d  $f25, $sp, (8 * 12)
+    fst.d  $f26, $sp, (8 * 13)
+    fst.d  $f27, $sp, (8 * 14)
+    fst.d  $f28, $sp, (8 * 15)
+    fst.d  $f29, $sp, (8 * 16)
+    fst.d  $f30, $sp, (8 * 17)
+    fst.d  $f31, $sp, (8 * 18)
+    //setup emu -> register
+    ld.d   $r12, $r4, (8 * 0)
+    ld.d   $r13, $r4, (8 * 1)
+    ld.d   $r14, $r4, (8 * 2)
+    ld.d   $r15, $r4, (8 * 3)
+    ld.d   $r16, $r4, (8 * 4)
+    ld.d   $r17, $r4, (8 * 5)
+    ld.d   $r18, $r4, (8 * 6)
+    ld.d   $r19, $r4, (8 * 7)
+    ld.d   $r23, $r4, (8 * 8)
+    ld.d   $r24, $r4, (8 * 9)
+    ld.d   $r25, $r4, (8 * 10)
+    ld.d   $r26, $r4, (8 * 11)
+    ld.d   $r27, $r4, (8 * 12)
+    ld.d   $r28, $r4, (8 * 13)
+    ld.d   $r29, $r4, (8 * 14)
+    ld.d   $r30, $r4, (8 * 15)
+    ld.d   $r31, $r4, (8 * 16)  //xFlags  
+    ld.d   $r20, $r4, (8 * 17)  //xRIP
+    ld.d   $r11, $r4, 552 // grab an old value of emu->xSPSave
+    st.d   $sp,  $r4, 552 // save current sp to emu->xSPSave
+    // push sentinel onto the stack
+    st.d   $r11, $sp, -16
+    st.d   $r0,  $sp, -8
+    addi.d $sp,  $sp, -16
+    //jump to function
+    jirl   $r0,  $a1, 0
diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h
index 70a8eaa8..66cb026f 100644
--- a/src/dynarec/native_lock.h
+++ b/src/dynarec/native_lock.h
@@ -77,6 +77,44 @@
 #define native_lock_get_d(A)                rv64_lock_get_d(A)

 #define native_lock_get_dd(A)               rv64_lock_get_dd(A)

 

+#elif defined(LA464)

+#include "la464/la464_lock.h"

+

+#define USE_CAS

+

+// no byte or 2-bytes atomic access on LA464

+#define native_lock_xchg_dd(A, B)           la464_lock_xchg_dd(A, B)

+#define native_lock_xchg_d(A, B)            la464_lock_xchg_d(A, B)

+#define native_lock_xchg_h(A, B)            la464_lock_xchg_h(A, B)

+#define native_lock_xchg_b(A, B)            la464_lock_xchg_b(A, B)

+#define native_lock_storeifref(A, B, C)     la464_lock_storeifref(A, B, C)

+#define native_lock_storeifref_d(A, B, C)   la464_lock_storeifref_d(A, B, C)

+#define native_lock_storeifref2_d(A, B, C)  la464_lock_storeifref2_d(A, B, C)

+#define native_lock_storeifnull(A, B)       la464_lock_storeifnull(A, B)

+#define native_lock_storeifnull_d(A, B)     la464_lock_storeifnull_d(A, B)

+#define native_lock_decifnot0b(A)           la464_lock_decifnot0b(A)

+#define native_lock_storeb(A, B)            la464_lock_storeb(A, B)

+#define native_lock_incif0(A)               la464_lock_incif0(A)

+#define native_lock_decifnot0(A)            la464_lock_decifnot0(A)

+#define native_lock_store(A, B)             la464_lock_store(A, B)

+#define native_lock_store_dd(A, B)          la464_lock_store_dd(A, B)

+

+// there is no atomic move on 8bytes, so faking it

+#define native_lock_read_b(A)               tmpcas=*(uint8_t*)(A)

+#define native_lock_write_b(A, B)           la464_lock_cas_b(A, tmpcas, B)

+// there is no atomic move on 16bytes, so faking it

+#define native_lock_read_h(A)               tmpcas=*(uint16_t*)(A)

+#define native_lock_write_h(A, B)           la464_lock_cas_h(A, tmpcas, B)

+#define native_lock_read_d(A)               tmpcas=*(uint32_t*)(A)

+#define native_lock_write_d(A, B)           la464_lock_cas_d(A, tmpcas, B)

+#define native_lock_read_dd(A)              tmpcas=*(uint64_t*)(A)

+#define native_lock_write_dd(A, B)          la464_lock_cas_dd(A, tmpcas, B)

+#define native_lock_read_dq(A, B, C)        *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1];

+#define native_lock_write_dq(A, B, C)       la464_lock_cas_dq(C, A, tmpcas, B)

+#define native_lock_get_b(A)                la464_lock_get_b(A)

+#define native_lock_get_d(A)                la464_lock_get_d(A)

+#define native_lock_get_dd(A)               la464_lock_get_dd(A)

+

 #else

 #error Unsupported architecture

 #endif

diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index 90c9b7b0..4548d7e9 100644
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -74,6 +74,9 @@ typedef struct x64emu_s {
     #ifdef RV64         // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers.
     uintptr_t xSPSave;  // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch.
     #endif
+    #ifdef LA464         // it would be better to use a dedicated register for this like arm64 xSavedSP, but we're running of of free registers.
+    uintptr_t xSPSave;  // sp base value of current dynarec frame, used by call/ret optimization to reset stack when unmatch.
+    #endif
     fpu_ld_t    fpu_ld[8]; // for long double emulation / 80bits fld fst
     fpu_ll_t    fpu_ll[8]; // for 64bits fild / fist sequence
 	fpu_p_reg_t p_regs[8];
diff --git a/src/include/dynarec_la464.h b/src/include/dynarec_la464.h
new file mode 100644
index 00000000..2adb3e98
--- /dev/null
+++ b/src/include/dynarec_la464.h
@@ -0,0 +1,10 @@
+#ifndef __DYNAREC_LA464_H_
+#define __DYNAREC_LA464_H_
+
+typedef struct dynablock_s dynablock_t;
+typedef struct x64emu_s x64emu_t;
+
+void CancelBlock64(void);
+void* FillBlock64(dynablock_t* block, uintptr_t addr);
+
+#endif //__DYNAREC_LA464_H_
\ No newline at end of file
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index fac0f478..c3c1c572 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -1684,6 +1684,21 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for
                 for (int i=-4; i<4; ++i) {
                     printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8));
                 }
+#elif defined(LA464)
+            if(db) {
+                shown_regs = 1;
+                for (int i=0; i<16; ++i) {
+                    if(!(i%4)) printf_log(log_minimum, "\n");
+                    printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]);
+                }
+                printf_log(log_minimum, "\n");
+                for (int i=0; i<6; ++i)
+                    printf_log(log_minimum, "%s:0x%04x ", seg_name[i], emu->segs[i]);
+            }
+            if(rsp!=addr && getProtection((uintptr_t)rsp-4*8) && getProtection((uintptr_t)rsp+4*8))
+                for (int i=-4; i<4; ++i) {
+                    printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8));
+                }
 #else
             #warning TODO
 #endif