about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-14 17:58:04 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-14 17:58:04 +0100
commit4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6 (patch)
treee89e9892fa166aa348b8c9f902de7428e875c7bc /src/dynarec
parent3dda84e58b148f92b2bb4d94caacc84011fa3919 (diff)
downloadbox64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.tar.gz
box64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.zip
[DYNAREC] Added Basic blocks for dynarec
Diffstat (limited to 'src/dynarec')
-rwxr-xr-xsrc/dynarec/arm64_epilog.S81
-rwxr-xr-xsrc/dynarec/arm64_lock_helper.S87
-rwxr-xr-xsrc/dynarec/arm64_lock_helper.h31
-rwxr-xr-xsrc/dynarec/arm64_next.S47
-rwxr-xr-xsrc/dynarec/arm64_printer.c14
-rw-r--r--src/dynarec/arm64_printer.h6
-rwxr-xr-xsrc/dynarec/arm64_prolog.S53
-rwxr-xr-xsrc/dynarec/dynablock.c422
-rwxr-xr-xsrc/dynarec/dynablock_private.h36
-rwxr-xr-xsrc/dynarec/dynarec.c38
-rwxr-xr-xsrc/dynarec/dynarec_arm64.c449
-rwxr-xr-xsrc/dynarec/dynarec_arm64_functions.c354
-rwxr-xr-xsrc/dynarec/dynarec_arm64_functions.h61
-rwxr-xr-xsrc/dynarec/dynarec_arm64_private.h56
-rwxr-xr-xsrc/dynarec/dynarec_private.h35
15 files changed, 1751 insertions, 19 deletions
diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S
new file mode 100755
index 00000000..af39c1ba
--- /dev/null
+++ b/src/dynarec/arm64_epilog.S
@@ -0,0 +1,81 @@
+//arm epilog for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global arm64_epilog
+arm64_epilog:
+    //update register -> emu
+    str     x10,  [x0, (8 *  0)]
+    str     x11,  [x0, (8 *  1)]
+    str     x12, [x0, (8 *  2)]
+    str     x13, [x0, (8 *  3)]
+    str     x14, [x0, (8 *  4)]
+    str     x15, [x0, (8 *  5)]
+    str     x16, [x0, (8 *  6)]
+    str     x17, [x0, (8 *  7)]
+    str     x18, [x0, (8 *  8)]
+    str     x19, [x0, (8 *  9)]
+    str     x20, [x0, (8 * 10)]
+    str     x21, [x0, (8 * 11)]
+    str     x22, [x0, (8 * 12)]
+    str     x23, [x0, (8 * 13)]
+    str     x24, [x0, (8 * 14)]
+    str     x25, [x0, (8 * 15)]
+    str     x26, [x0, (8 * 16)]
+    str     x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x25 must be EIP now)
+    //restore all used register
+    //vpop     {d8-d15}
+    ldr     x10, [sp, (8 *  0)]
+    ldr     x11, [sp, (8 *  1)]
+    ldr     x12, [sp, (8 *  2)]
+    ldr     x13, [sp, (8 *  3)]
+    ldr     x14, [sp, (8 *  4)]
+    ldr     x15, [sp, (8 *  5)]
+    ldr     x16, [sp, (8 *  6)]
+    ldr     x17, [sp, (8 *  7)]
+    ldr     x18, [sp, (8 *  8)]
+    ldr     x19, [sp, (8 *  9)]
+    ldr     x20, [sp, (8 * 10)]
+    ldr     x21, [sp, (8 * 11)]
+    ldr     x22, [sp, (8 * 12)]
+    ldr     x23, [sp, (8 * 13)]
+    ldr     x24, [sp, (8 * 14)]
+    ldr     x25, [sp, (8 * 15)]
+    ldr     x26, [sp, (8 * 16)]
+    ldr     x27, [sp, (8 * 17)]
+    add     sp,  sp, (8 * 18)
+    ldp     lr, fp, [sp, 16]!  // saved lr
+    //end, return...
+    ret
+
+
+.global arm64_epilog_fast
+arm64_epilog_fast:
+    //restore all used register
+    //vpop     {d8-d15}
+    ldr     x8,  [sp, (8 *  0)]
+    ldr     x9,  [sp, (8 *  1)]
+    ldr     x10, [sp, (8 *  2)]
+    ldr     x11, [sp, (8 *  3)]
+    ldr     x12, [sp, (8 *  4)]
+    ldr     x13, [sp, (8 *  5)]
+    ldr     x14, [sp, (8 *  6)]
+    ldr     x15, [sp, (8 *  7)]
+    ldr     x16, [sp, (8 *  8)]
+    ldr     x17, [sp, (8 *  9)]
+    ldr     x18, [sp, (8 * 10)]
+    ldr     x19, [sp, (8 * 11)]
+    ldr     x20, [sp, (8 * 12)]
+    ldr     x21, [sp, (8 * 13)]
+    ldr     x22, [sp, (8 * 14)]
+    ldr     x23, [sp, (8 * 15)]
+    ldr     x24, [sp, (8 * 16)]
+    ldr     x25, [sp, (8 * 17)]
+    add     sp,  sp, (8 * 18)
+    ldp     lr, fp, [sp, 16]!  // saved lr
+    //end, return...
+    ret
diff --git a/src/dynarec/arm64_lock_helper.S b/src/dynarec/arm64_lock_helper.S
new file mode 100755
index 00000000..51b43316
--- /dev/null
+++ b/src/dynarec/arm64_lock_helper.S
@@ -0,0 +1,87 @@
+//arm lock helper
+//there is 2 part: read and write
+// write return 0 on success, 1 on fail (value has been changed)
+
+.text
+.align 4
+
+.global arm64_lock_read_b
+.global arm64_lock_write_b
+.global arm64_lock_read_h
+.global arm64_lock_write_h
+.global arm64_lock_read_d
+.global arm64_lock_write_d
+.global arm64_lock_read_dd
+.global arm64_lock_write_dd
+.global arm64_lock_xchg
+.global arm64_lock_storeifnull
+
+
+arm64_lock_read_b:
+    // address is x0, return is x0
+    ldaxrb  w0, [x0]
+    ret
+
+arm64_lock_write_b:
+    // address is x0, value is x1, return is x0
+    mov     x2, x0
+    stlxrb  w0, w1, [x2]
+    ret
+
+arm64_lock_read_h:
+    // address is x0, return is x0
+    ldaxrh  w0, [x0]
+    ret
+
+arm64_lock_write_h:
+    // address is x0, value is x1, return is x0
+    mov     x2, x0
+    stlxrh  w0, w1, [x2]
+    ret
+
+arm64_lock_read_d:
+    // address is x0, return is x0
+    #ldaxr   w0, [x0]
+    ldr     w0,[x0]
+    ret
+
+arm64_lock_write_d:
+    // address is x0, value is w1, return is x0
+    mov     x2, x0
+    #stlxr   w0, w1, [x2]
+    str     w1, [x2]
+    mov     w0, 0
+    ret
+
+arm64_lock_read_dd:
+    // address is x0, return is x0
+    ldaxr   x0, [x0]
+    ret
+
+arm64_lock_write_dd:
+    // address is x0, value is x1, return is x0
+    mov     x2, x0
+    stlxr   w0, x1, [x2]
+    ret
+
+arm64_lock_xchg:
+    // address is x0, value is x1, return old value in x0
+    ldaxr   w2, [x0]
+    stlxr   w3, w1, [x0]
+    cmp     w3, #1
+    beq     arm64_lock_xchg
+    mov     w0, w2
+    ret
+
+arm64_lock_storeifnull:
+    // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value)
+    ldaxr   x2, [x0]
+    cmp     x2, #0
+    bne     arm64_lock_storeifnull_exit
+    mov     x2, x1
+    stlxr   w3, x2, [x0]
+    cmp     w3, #1
+    beq     arm64_lock_storeifnull
+arm64_lock_storeifnull_exit:
+    mov     x0, x2
+    ret
diff --git a/src/dynarec/arm64_lock_helper.h b/src/dynarec/arm64_lock_helper.h
new file mode 100755
index 00000000..a6879bea
--- /dev/null
+++ b/src/dynarec/arm64_lock_helper.h
@@ -0,0 +1,31 @@
+#ifndef __ARM64_LOCK_HELPER__H__
+#define __ARM64_LOCK_HELPER__H__
+#include <stdint.h>
+
+// LDAXRB of ADDR
+extern uint8_t arm64_lock_read_b(void* addr);
+// STLXRB of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_b(void* addr, uint8_t val);
+
+// LDAXRH of ADDR
+extern uint16_t arm64_lock_read_h(void* addr);
+// STLXRH of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_h(void* addr, uint16_t val);
+
+// LDAXR of ADDR
+extern uint32_t arm64_lock_read_d(void* addr);
+// STLXR of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_d(void* addr, uint32_t val);
+
+// LDAXRD of ADDR
+extern uint64_t arm64_lock_read_dd(void* addr);
+// STLXR of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_dd(void* addr, uint64_t val);
+
+// Atomicaly exchange value at [p] with val, return old p
+extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val);
+
+// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old)
+extern void* arm64_lock_storeifnull(void*p, void* val);
+
+#endif  //__ARM64_LOCK_HELPER__H__
\ No newline at end of file
diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S
new file mode 100755
index 00000000..2410750c
--- /dev/null
+++ b/src/dynarec/arm64_next.S
@@ -0,0 +1,47 @@
+//arm update linker table for dynarec
+//called with pointer to emu as 1st parameter
+//and address of table to as 2nd parameter
+//ip is at r12
+
+.text
+.align 4
+
+.extern LinkNext
+
+.global arm64_next
+arm64_next:
+    // emu is r0
+    // don't put put back reg value in emu, faster but more tricky to debug
+    // IP address is r1
+    sub     sp,  sp, (8 * 11)
+    str     x0,  [sp, (8 *  0)]
+    str     x1,  [sp, (8 *  1)]
+    str     x10, [sp, (8 *  2)]
+    str     x11, [sp, (8 *  3)]
+    str     x12, [sp, (8 *  4)]
+    str     x13, [sp, (8 *  5)]
+    str     x14, [sp, (8 *  6)]
+    str     x15, [sp, (8 *  7)]
+    str     x16, [sp, (8 *  8)]
+    str     x17, [sp, (8 *  9)]
+    str     x18, [sp, (8 * 10)]
+    // call the function
+    bl      LinkNext
+    // preserve return value
+    mov     x3, x0
+    // pop regs
+    ldr     x0,  [sp, (8 *  0)]
+    ldr     x1,  [sp, (8 *  1)]
+    ldr     x10, [sp, (8 *  2)]
+    ldr     x11, [sp, (8 *  3)]
+    ldr     x12, [sp, (8 *  4)]
+    ldr     x13, [sp, (8 *  5)]
+    ldr     x14, [sp, (8 *  6)]
+    ldr     x15, [sp, (8 *  7)]
+    ldr     x16, [sp, (8 *  8)]
+    ldr     x17, [sp, (8 *  9)]
+    ldr     x18, [sp, (8 * 10)]
+    add     sp,  sp, (8 * 11)
+    // return offset is jump address
+    br      x3
+
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
new file mode 100755
index 00000000..49539a1a
--- /dev/null
+++ b/src/dynarec/arm64_printer.c
@@ -0,0 +1,14 @@
+#include <stdint.h>

+#include <stddef.h>

+#include <string.h>

+#include <stdio.h>

+

+#include "arm64_printer.h"

+

+const char* arm64_print(uint32_t opcode)

+{

+    static char buff[200];

+

+    snprintf(buff, sizeof(buff), "0x%8X ???", opcode);

+    return buff;

+}
\ No newline at end of file
diff --git a/src/dynarec/arm64_printer.h b/src/dynarec/arm64_printer.h
new file mode 100644
index 00000000..6fe21c33
--- /dev/null
+++ b/src/dynarec/arm64_printer.h
@@ -0,0 +1,6 @@
+#ifndef _ARM_PRINTER_H_
+#define _ARM_PRINTER_H_
+
+const char* arm64_print(uint32_t opcode);
+
+#endif //_ARM_PRINTER_H_
diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S
new file mode 100755
index 00000000..f480f2ea
--- /dev/null
+++ b/src/dynarec/arm64_prolog.S
@@ -0,0 +1,53 @@
+//arm prologue for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global arm64_prolog
+arm64_prolog:
+    //save all 18 used register
+    stp     lr, fp, [sp, 16]!  // save lr
+    sub     sp,  sp, (8 * 18)
+    str     x10, [sp, (8 *  0)]
+    str     x11, [sp, (8 *  1)]
+    str     x12, [sp, (8 *  2)]
+    str     x13, [sp, (8 *  3)]
+    str     x14, [sp, (8 *  4)]
+    str     x15, [sp, (8 *  5)]
+    str     x16, [sp, (8 *  6)]
+    str     x17, [sp, (8 *  7)]
+    str     x18, [sp, (8 *  8)]
+    str     x19, [sp, (8 *  9)]
+    str     x20, [sp, (8 * 10)]
+    str     x21, [sp, (8 * 11)]
+    str     x22, [sp, (8 * 12)]
+    str     x23, [sp, (8 * 13)]
+    str     x24, [sp, (8 * 14)]
+    str     x25, [sp, (8 * 15)]
+    str     x26, [sp, (8 * 16)]
+    str     x27, [sp, (8 * 17)]
+    //vpush     {d8-d15}    // save NEON regs?
+    //setup emu -> register
+    ldr     x10, [x0, (8 *  0)]
+    ldr     x11, [x0, (8 *  1)]
+    ldr     x12, [x0, (8 *  2)]
+    ldr     x13, [x0, (8 *  3)]
+    ldr     x14, [x0, (8 *  4)]
+    ldr     x15, [x0, (8 *  5)]
+    ldr     x16, [x0, (8 *  6)]
+    ldr     x17, [x0, (8 *  7)]
+    ldr     x18, [x0, (8 *  8)]
+    ldr     x19, [x0, (8 *  9)]
+    ldr     x20, [x0, (8 * 10)]
+    ldr     x21, [x0, (8 * 11)]
+    ldr     x22, [x0, (8 * 12)]
+    ldr     x23, [x0, (8 * 13)]
+    ldr     x24, [x0, (8 * 14)]
+    ldr     x25, [x0, (8 * 15)]
+    ldr     x26, [x0, (8 * 16)]
+    ldr     x27, [x0, (8 * 17)]
+    //jump to function
+    br       x1
diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c
new file mode 100755
index 00000000..2ab39d09
--- /dev/null
+++ b/src/dynarec/dynablock.c
@@ -0,0 +1,422 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynablock.h"
+#include "dynablock_private.h"
+#include "dynarec_private.h"
+#include "elfloader.h"
+#ifdef ARM64
+#include "dynarec_arm64.h"
+#include "arm64_lock_helper.h"
+#else
+#error Unsupported architecture!
+#endif
+#include "custommem.h"
+#include "khash.h"
+
+KHASH_MAP_INIT_INT(dynablocks, dynablock_t*)
+
+uint32_t X31_hash_code(void* addr, int len)
+{
+    if(!len) return 0;
+    uint8_t* p = (uint8_t*)addr;
+	int32_t h = *p;
+	for (--len, ++p; len; --len, ++p) h = (h << 5) - h + (int32_t)*p;
+	return (uint32_t)h;
+}
+
+dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct)
+{
+    if(!textsz) {
+        printf_log(LOG_NONE, "Error, creating a NULL sized Dynablock\n");
+        return NULL;
+    }
+    dynablocklist_t* ret = (dynablocklist_t*)calloc(1, sizeof(dynablocklist_t));
+    ret->text = text;
+    ret->textsz = textsz;
+    if(direct && textsz) {
+        ret->direct = (dynablock_t**)calloc(textsz, sizeof(dynablock_t*));
+        if(!ret->direct) {printf_log(LOG_NONE, "Warning, fail to create direct block for dynablock @%p\n", (void*)text);}
+    }
+    dynarec_log(LOG_DEBUG, "New Dynablocklist %p, from %p->%p\n", ret, (void*)text, (void*)(text+textsz));
+    return ret;
+}
+
+void FreeDynablock(dynablock_t* db)
+{
+    if(db) {
+        dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size, db->father, db->sons_size, db->gone);
+        if(db->gone)
+            return; // already in the process of deletion!
+        db->done = 0;
+        db->gone = 1;
+        // remove from direct if there
+        uintptr_t startdb = db->parent->text;
+        uintptr_t enddb = db->parent->text + db->parent->textsz;
+        if(db->parent->direct) {
+            uintptr_t addr = (uintptr_t)db->x64_addr;
+            if(addr>=startdb && addr<enddb)
+                db->parent->direct[addr-startdb] = NULL;
+        }
+        // remove jumptable
+        setJumpTableDefault64(db->x64_addr);
+        // remove and free the sons
+        for (int i=0; i<db->sons_size; ++i) {
+            dynablock_t *son = (dynablock_t*)arm64_lock_xchg(&db->sons[i], 0);
+            FreeDynablock(son);
+        }
+        // only the father free the DynarecMap
+        if(!db->father) {
+            dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->block, db->size);
+            FreeDynarecMap(db, (uintptr_t)db->block, db->size);
+        }
+        free(db->sons);
+        free(db->instsize);
+        free(db);
+    }
+}
+
+void FreeDynablockList(dynablocklist_t** dynablocks)
+{
+    if(!dynablocks)
+        return;
+    if(!*dynablocks)
+        return;
+    dynarec_log(LOG_DEBUG, "Free Dynablocklist %p, with Direct Blocks %p\n", *dynablocks, (*dynablocks)->direct);
+    if((*dynablocks)->direct) {
+        for (int i=0; i<(*dynablocks)->textsz; ++i) {
+            if((*dynablocks)->direct[i] && !(*dynablocks)->direct[i]->father) 
+                FreeDynablock((*dynablocks)->direct[i]);
+        }
+        free((*dynablocks)->direct);
+    }
+    (*dynablocks)->direct = NULL;
+
+    free(*dynablocks);
+    *dynablocks = NULL;
+}
+
+void MarkDynablock(dynablock_t* db)
+{
+    if(db) {
+        if(db->father)
+            db = db->father;    // mark only father
+        if(db->need_test)
+            return; // already done
+        db->need_test = 1;  // test only blocks that can be marked (and so deleted)
+        setJumpTableDefault64(db->x64_addr);
+        for(int i=0; i<db->sons_size; ++i)
+            setJumpTableDefault64(db->sons[i]->x64_addr);
+    }
+}
+
+uintptr_t StartDynablockList(dynablocklist_t* db)
+{
+    if(db)
+        return db->text;
+    return 0;
+}
+uintptr_t EndDynablockList(dynablocklist_t* db)
+{
+    if(db)
+        return db->text+db->textsz-1;
+    return 0;
+}
+
+int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2)
+{
+    if(start1 > end2 || start2 > end1)
+        return 0;
+    return 1;
+}
+
+void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
+{
+    if(!dynablocks)
+        return;
+    if(!dynablocks->direct)
+        return;
+    uintptr_t startdb = dynablocks->text;
+    uintptr_t enddb = startdb + dynablocks->textsz -1;
+    uintptr_t start = addr;
+    uintptr_t end = addr+size-1;
+    if(start<startdb)
+        start = startdb;
+    if(end>enddb)
+        end = enddb;
+    dynablock_t *db;
+    if(end>startdb && start<enddb)
+        for(uintptr_t i = start; i<end; ++i)
+            if((db=dynablocks->direct[i-startdb]))
+                if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1))
+                    MarkDynablock(db);
+}
+
+void FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
+{
+    if(!dynablocks)
+        return;
+
+    if(dynablocks->direct) {
+        dynablock_t* db;
+        int ret;
+        khint_t k;
+        kh_dynablocks_t *blocks = kh_init(dynablocks);
+        // copy in a temporary list
+        if(dynablocks->direct) {
+            uintptr_t startdb = dynablocks->text;
+            uintptr_t enddb = startdb + dynablocks->textsz;
+            uintptr_t start = addr;
+            uintptr_t end = addr+size;
+            if(start<startdb)
+                start = startdb;
+            if(end>enddb)
+                end = enddb;
+            if(end>startdb && start<enddb)
+                for(uintptr_t i = start; i<end; ++i) {
+                    db = (dynablock_t*)arm64_lock_xchg(&dynablocks->direct[i-startdb], 0);
+                    if(db) {
+                        if(db->father)
+                            db = db->father;
+                        if(db->parent==dynablocks) {
+                            k = kh_put(dynablocks, blocks, (uintptr_t)db, &ret);
+                            kh_value(blocks, k) = db;
+                        }
+                    }
+                }
+        }
+        // purge the list
+        kh_foreach_value(blocks, db,
+            FreeDynablock(db);
+        );
+        kh_destroy(dynablocks, blocks);
+    }
+}
+void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
+{
+    if(!dynablocks)
+        return;
+    if(dynablocks->direct) {
+        uintptr_t new_addr = addr - dynablocks->maxsz;
+        uintptr_t new_size = size + dynablocks->maxsz;
+        MarkDirectDynablock(dynablocks, new_addr, new_size);
+        // the blocks check before
+        for(int idx=(new_addr)>>DYNAMAP_SHIFT; idx<(addr>>DYNAMAP_SHIFT); ++idx)
+            MarkDirectDynablock(getDB(idx), new_addr, new_size);
+    }
+}
+
+dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks)
+{
+    if(!dynablocks)
+        return NULL;
+    dynablock_t* db;
+    kh_foreach_value(dynablocks, db, 
+        const uintptr_t s = (uintptr_t)db->block;
+        const uintptr_t e = (uintptr_t)db->block+db->size;
+        if((uintptr_t)addr>=s && (uintptr_t)addr<e)
+            return db->father?db->father:db;
+    )
+    return NULL;
+}
+
+static dynablocklist_t* getDBFromAddress(uintptr_t addr)
+{
+    const uintptr_t idx = (addr>>DYNAMAP_SHIFT);
+    return getDB(idx);
+}
+
+dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created)
+{
+    if(!dynablocks) {
+        dynarec_log(LOG_INFO, "Warning: Ask to create a dynablock with a NULL dynablocklist (addr=%p)\n", (void*)addr);
+        *created = 0;
+        return NULL;
+    }
+    if((addr<dynablocks->text) || (addr>=(dynablocks->text+dynablocks->textsz))) {
+        // this should be useless
+        //dynarec_log(LOG_INFO, "Warning: Refused to create a Direct Block that is out-of-bound: dynablocks=%p (%p:%p), addr=%p\n", dynablocks, (void*)(dynablocks->text), (void*)(dynablocks->text+dynablocks->textsz), (void*)addr);
+        //*created = 0;
+        //return NULL;
+        return AddNewDynablock(getDBFromAddress(addr), addr, created);
+    }
+    dynablock_t* block = NULL;
+    // first, check if it exist in direct access mode
+    if(dynablocks->direct) {
+        block = dynablocks->direct[addr-dynablocks->text];
+        if(block) {
+            dynarec_log(LOG_DUMP, "Block already exist in Direct Map\n");
+            *created = 0;
+            return block;
+        }
+    }
+    
+    if (!*created)
+        return block;
+    
+    if(!dynablocks->direct) {
+        dynablock_t** p = (dynablock_t**)calloc(dynablocks->textsz, sizeof(dynablock_t*));
+        if(arm64_lock_storeifnull(&dynablocks->direct, p)!=p)
+            free(p);    // someone already create the direct array, too late...
+    }
+
+    // create and add new block
+    dynarec_log(LOG_DUMP, "Ask for DynaRec Block creation @%p\n", (void*)addr);
+
+    block = (dynablock_t*)calloc(1, sizeof(dynablock_t));
+    block->parent = dynablocks; 
+    dynablock_t* tmp = (dynablock_t*)arm64_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block);
+    if(tmp !=  block) {
+        // a block appeard!
+        free(block);
+        *created = 0;
+        return tmp;
+    }
+
+    *created = 1;
+    return block;
+}
+
+/* 
+    return NULL if block is not found / cannot be created. 
+    Don't create if create==0
+*/
+static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, dynablock_t* current)
+{
+    // try the quickest way first: get parent of current and check if ok!
+    dynablocklist_t *dynablocks = NULL;
+    dynablock_t* block = NULL;
+    if(current) {
+        dynablocks = current->parent;
+        if(dynablocks && !(addr>=dynablocks->text && addr<(dynablocks->text+dynablocks->textsz)))
+            dynablocks = NULL;
+    }
+    // nope, lets do the long way
+    if(!dynablocks) {
+        dynablocks = getDBFromAddress(addr);
+        if(!dynablocks) {
+            dynablocks = GetDynablocksFromAddress(emu->context, addr);
+            if(!dynablocks)
+                return NULL;
+        }
+    }
+    // check direct first, without lock
+    if(dynablocks->direct/* && (addr>=dynablocks->text) && (addr<(dynablocks->text+dynablocks->textsz))*/)
+        if((block = dynablocks->direct[addr-dynablocks->text]))
+            return block;
+
+    int created = create;
+    block = AddNewDynablock(dynablocks, addr, &created);
+    if(!created)
+        return block;   // existing block...
+
+    if(box64_dynarec_dump)
+        pthread_mutex_lock(&my_context->mutex_dyndump);
+    // fill the block
+    block->x64_addr = (void*)addr;
+    if(0/*!FillBlock64(block, filladdr)*/) {
+        void* old = (void*)arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], 0);
+        if(old!=block && old) {// put it back in place, strange things are happening here!
+            dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old);
+            arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], (uintptr_t)old);
+        }
+        free(block);
+        block = NULL;
+    }
+    if(box64_dynarec_dump)
+        pthread_mutex_unlock(&my_context->mutex_dyndump);
+    // check size
+    if(block && block->x64_size) {
+        int blocksz = block->x64_size;
+        if(dynablocks->maxsz<blocksz) {
+            dynablocks->maxsz = blocksz;
+            for(int idx=(addr>>DYNAMAP_SHIFT)+1; idx<=((addr+blocksz)>>DYNAMAP_SHIFT); ++idx) {
+                dynablocklist_t* dblist;
+                if((dblist = getDB(idx)))
+                    if(dblist->maxsz<blocksz)
+                        dblist->maxsz = blocksz;
+            }
+        }
+        lockDB();
+        protectDBnolock((uintptr_t)block->x64_addr, block->x64_size);
+        // fill-in jumptable
+        addJumpTableIfDefault64(block->x64_addr, block->block);
+        for(int i=0; i<block->sons_size; ++i)
+            addJumpTableIfDefault64(block->sons[i]->x64_addr, block->sons[i]->block);
+        unlockDB();
+    }
+
+    dynarec_log(LOG_DEBUG, " --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:0)), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0);
+
+    return block;
+}
+
+dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current)
+{
+    dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, *current);
+    if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) {
+        dynablock_t *father = db->father?db->father:db;
+        uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size);
+        if(hash!=father->hash) {
+            dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr);
+            // no more current if it gets invalidated too
+            if(*current && father->x64_addr>=(*current)->x64_addr && (father->x64_addr+father->x64_size)<(*current)->x64_addr)
+                *current = NULL;
+            // Free father, it's now invalid!
+            FreeDynablock(father);
+            // start again... (will create a new block)
+            db = internalDBGetBlock(emu, addr, addr, create, *current);
+        } else {
+            father->need_test = 0;
+            lockDB();
+            protectDBnolock((uintptr_t)father->x64_addr, father->x64_size);
+            // fill back jumptable
+            addJumpTableIfDefault64(father->x64_addr, father->block);
+            for(int i=0; i<father->sons_size; ++i)
+                addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
+            unlockDB();
+        }
+    } 
+    return db;
+}
+
+dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr)
+{
+    dynarec_log(LOG_DEBUG, "Creating AlternateBlock at %p for %p\n", (void*)addr, (void*)filladdr);
+    int create = 1;
+    dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, NULL);
+    if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) {
+        dynablock_t *father = db->father?db->father:db;
+        uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size);
+        if(hash!=father->hash) {
+            dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr);
+            // Free father, it's now invalid!
+            FreeDynablock(father);
+            // start again... (will create a new block)
+            db = internalDBGetBlock(emu, addr, filladdr, create, NULL);
+        } else {
+            father->need_test = 0;
+            lockDB();
+            protectDBnolock((uintptr_t)father->x64_addr, father->x64_size);
+            // fill back jumptable
+            addJumpTableIfDefault64(father->x64_addr, father->block);
+            for(int i=0; i<father->sons_size; ++i)
+                addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
+            unlockDB();
+        }
+    } 
+    return db;
+}
diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h
new file mode 100755
index 00000000..dd2ee4c0
--- /dev/null
+++ b/src/dynarec/dynablock_private.h
@@ -0,0 +1,36 @@
+#ifndef __DYNABLOCK_PRIVATE_H_
+#define __DYNABLOCK_PRIVATE_H_
+
+typedef struct dynablocklist_s  dynablocklist_t;
+
+typedef struct instsize_s {
+    unsigned int x64:4;
+    unsigned int nat:4;
+} instsize_t;
+
+typedef struct dynablock_s {
+    dynablocklist_t* parent;
+    void*           block;
+    int             size;
+    void*           x64_addr;
+    uintptr_t       x64_size;
+    uint32_t        hash;
+    uint8_t         need_test;
+    uint8_t         done;
+    uint8_t         gone;
+    uint8_t         dummy;
+    int             isize;
+    dynablock_t**   sons;   // sons (kind-of dummy dynablock...)
+    int             sons_size;
+    dynablock_t*    father; // set only in the case of a son
+    instsize_t*     instsize;
+} dynablock_t;
+
+typedef struct dynablocklist_s {
+    uintptr_t           text;
+    int                 textsz;
+    int                 maxsz;     // maxblock size (for this block or previous block)
+    dynablock_t**       direct;    // direct mapping (waste of space, so the array is created at first write)
+} dynablocklist_t;
+
+#endif //__DYNABLOCK_PRIVATE_H_
\ No newline at end of file
diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c
index c88cd61f..8769bc1e 100755
--- a/src/dynarec/dynarec.c
+++ b/src/dynarec/dynarec.c
@@ -22,10 +22,10 @@
 #endif
 
 #ifdef DYNAREC
-#ifdef ARM
-void arm_prolog(x64emu_t* emu, void* addr) EXPORTDYN;
-void arm_epilog() EXPORTDYN;
-void arm_epilog_fast() EXPORTDYN;
+#ifdef ARM64
+void arm64_prolog(x64emu_t* emu, void* addr) EXPORTDYN;
+void arm64_epilog() EXPORTDYN;
+void arm64_epilog_fast() EXPORTDYN;
 #endif
 #endif
 
@@ -39,7 +39,7 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2)
     if(!addr) {
         x2-=8;  // actual PC is 2 instructions ahead
         dynablock_t* db = FindDynablockFromNativeAddress(x2);
-        printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x86addr=%p)\n", x2, db, db?(void*)getX86Address(db, (uintptr_t)x2):NULL);
+        printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x64addr=%p)\n", x2, db, db?(void*)getX64Address(db, (uintptr_t)x2):NULL);
     }
     #endif
     dynablock_t* current = NULL;
@@ -47,17 +47,17 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2)
     dynablock_t* block = DBGetBlock(emu, addr, 1, &current);
     if(!block) {
         // no block, let link table as is...
-        //tableupdate(arm_epilog, addr, table);
-        return arm_epilog;
+        //tableupdate(arm64_epilog, addr, table);
+        return arm64_epilog;
     }
     if(!block->done) {
         // not finished yet... leave linker
         //tableupdate(arm_linker, addr, table);
-        return arm_epilog;
+        return arm64_epilog;
     }
     if(!(jblock=block->block)) {
         // null block, but done: go to epilog, no linker here
-        return arm_epilog;
+        return arm64_epilog;
     }
     //dynablock_t *father = block->father?block->father:block;
     return jblock;
@@ -82,7 +82,7 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
         }
     }
 #ifdef DYNAREC
-    if(!box86_dynarec)
+    if(!box64_dynarec)
 #endif
         EmuCall(emu, addr);
 #ifdef DYNAREC
@@ -107,18 +107,18 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
                 dynarec_log(LOG_DEBUG, "%04d|Calling Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu);
                 Run(emu, 1);
             } else {
-                dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x86 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu);
+                dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu);
                 CHECK_FLAGS(emu);
                 // block is here, let's run it!
-                #ifdef ARM
-                arm_prolog(emu, block->block);
+                #ifdef ARM64
+                arm64_prolog(emu, block->block);
                 #endif
             }
             if(emu->fork) {
                 int forktype = emu->fork;
                 emu->quit = 0;
                 emu->fork = 0;
-                emu = x86emu_fork(emu, forktype);
+                emu = x64emu_fork(emu, forktype);
                 if(emu->type == EMUTYPE_MAIN) {
                     ejb = GetJmpBuf();
                     ejb->emu = emu;
@@ -170,7 +170,7 @@ int DynaRun(x64emu_t* emu)
         }
     }
 #ifdef DYNAREC
-    if(!box86_dynarec)
+    if(!box64_dynarec)
 #endif
         return Run(emu, 0);
 #ifdef DYNAREC
@@ -186,17 +186,17 @@ int DynaRun(x64emu_t* emu)
                 dynarec_log(LOG_DEBUG, "%04d|Running Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu);
                 Run(emu, 1);
             } else {
-                dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x86 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu);
+                dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu);
                 // block is here, let's run it!
-                #ifdef ARM
-                arm_prolog(emu, block->block);
+                #ifdef ARM64
+                arm64_prolog(emu, block->block);
                 #endif
             }
             if(emu->fork) {
                 int forktype = emu->fork;
                 emu->quit = 0;
                 emu->fork = 0;
-                emu = x86emu_fork(emu, forktype);
+                emu = x64emu_fork(emu, forktype);
                 if(emu->type == EMUTYPE_MAIN) {
                     ejb = GetJmpBuf();
                     ejb->emu = emu;
diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c
new file mode 100755
index 00000000..b3b0b06c
--- /dev/null
+++ b/src/dynarec/dynarec_arm64.c
@@ -0,0 +1,449 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <string.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "custommem.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynablock.h"
+#include "dynablock_private.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+#include "elfloader.h"
+
+void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) {
+    uint8_t *ip = (uint8_t*)inst->addr;
+    if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') {
+        uintptr_t a = *(uintptr_t*)(ip+3);
+        if(a==0) {
+            dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":"");
+        } else {
+            dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":"");
+        }
+    } else {
+        if(dec) {
+            dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[1m":"", ip, DecodeX64Trace(dec, inst->addr));
+        } else {
+            dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[1m":"", ip);
+            for(int i=0; i<inst->size; ++i) {
+                dynarec_log(LOG_NONE, "%02X ", ip[i]);
+            }
+            dynarec_log(LOG_NONE, " %s", name);
+        }
+        // print Call function name if possible
+        if(ip[0]==0xE8 || ip[0]==0xE9) { // Call / Jmp
+            uintptr_t nextaddr = (uintptr_t)ip + 5 + *((int32_t*)(ip+1));
+            printFunctionAddr(nextaddr, "=> ");
+        } else if(ip[0]==0xFF) {
+            if(ip[1]==0x25) {
+                uintptr_t nextaddr = (uintptr_t)ip + 6 + *((int32_t*)(ip+2));
+                printFunctionAddr(nextaddr, "=> ");
+            }
+        }
+        // end of line and colors
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");
+    }
+}
+
+void add_next(dynarec_arm_t *dyn, uintptr_t addr) {
+    if(dyn->next_sz == dyn->next_cap) {
+        dyn->next_cap += 16;
+        dyn->next = (uintptr_t*)realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t));
+    }
+    for(int i=0; i<dyn->next_sz; ++i)
+        if(dyn->next[i]==addr)
+            return;
+    dyn->next[dyn->next_sz++] = addr;
+}
+uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) {
+    // get closest, but no addresses befores
+    uintptr_t best = 0;
+    int i = 0;
+    while((i<dyn->next_sz) && (best!=addr)) {
+        if(dyn->next[i]<addr) { // remove the address, it's before current address
+            memmove(dyn->next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t));
+            --dyn->next_sz;
+        } else {
+            if((dyn->next[i]<best) || !best)
+                best = dyn->next[i];
+            ++i;
+        }
+    }
+    return best;
+}
+#define PK(A) (*((uint8_t*)(addr+(A))))
+int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n)
+{
+    if(!n)
+        return 1;
+    if(PK(0)==0x90)
+        return is_nops(dyn, addr+1, n-1);
+    if(n>1 && PK(0)==0x66)  // if opcode start with 0x66, and there is more after, than is *can* be a NOP
+        return is_nops(dyn, addr+1, n-1);
+    if(n>2 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x00)
+        return is_nops(dyn, addr+3, n-3);
+    if(n>2 && PK(0)==0x8d && PK(1)==0x76 && PK(2)==0x00)    // lea esi, [esi]
+        return is_nops(dyn, addr+3, n-3);
+    if(n>3 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x40 && PK(3)==0x00)
+        return is_nops(dyn, addr+4, n-4);
+    if(n>3 && PK(0)==0x8d && PK(1)==0x74 && PK(2)==0x26 && PK(3)==0x00)
+        return is_nops(dyn, addr+4, n-4);
+    if(n>4 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x44 && PK(3)==0x00 && PK(4)==0x00)
+        return is_nops(dyn, addr+5, n-5);
+    if(n>5 && PK(0)==0x8d && PK(1)==0xb6 && PK(2)==0x00 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00)
+        return is_nops(dyn, addr+6, n-6);
+    if(n>6 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x80 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00)
+        return is_nops(dyn, addr+7, n-7);
+    if(n>6 && PK(0)==0x8d && PK(1)==0xb4 && PK(2)==0x26 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) // lea esi, [esi+0]
+        return is_nops(dyn, addr+7, n-7);
+    if(n>7 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x84 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00 && PK(7)==0x00)
+        return is_nops(dyn, addr+8, n-8);
+    return 0;
+}
+
+// return size of next instuciton, -1 is unknown
+// not all instrction are setup
+int next_instruction(dynarec_arm_t *dyn, uintptr_t addr)
+{
+    uint8_t opcode = PK(0);
+    uint8_t nextop;
+    switch (opcode) {
+        case 0x66:
+            opcode = PK(1);
+            switch(opcode) {
+                case 0x90:
+                    return 2;
+            }
+            break;
+        case 0x81:
+            nextop = PK(1);
+            return fakeed(dyn, addr+2, 0, nextop)-addr + 4;
+        case 0x83:
+            nextop = PK(1);
+            return fakeed(dyn, addr+2, 0, nextop)-addr + 1;
+        case 0x84:
+        case 0x85:
+        case 0x88:
+        case 0x89:
+        case 0x8A:
+        case 0x8B:
+        case 0x8C:
+        case 0x8D:
+        case 0x8E:
+        case 0x8F:
+            nextop = PK(1);
+            return fakeed(dyn, addr+2, 0, nextop)-addr;
+        case 0x50:
+        case 0x51:
+        case 0x52:
+        case 0x53:
+        case 0x54:
+        case 0x55:
+        case 0x56:
+        case 0x57:
+        case 0x58:
+        case 0x59:
+        case 0x5A:
+        case 0x5B:
+        case 0x5C:
+        case 0x5D:
+        case 0x5E:
+        case 0x5F:
+        case 0x90:
+        case 0x91:
+        case 0x92:
+        case 0x93:
+        case 0x94:
+        case 0x95:
+        case 0x96:
+        case 0x97:
+        case 0x98:
+        case 0x99:
+        case 0x9B:
+        case 0x9C:
+        case 0x9D:
+        case 0x9E:
+        case 0x9F:
+            return 1;
+        case 0xA0:
+        case 0xA1:
+        case 0xA2:
+        case 0xA3:
+            return 5;
+        case 0xB0:
+        case 0xB1:
+        case 0xB2:
+        case 0xB3:
+        case 0xB4:
+        case 0xB5:
+        case 0xB6:
+        case 0xB7:
+            return 2;
+        case 0xB8:
+        case 0xB9:
+        case 0xBA:
+        case 0xBB:
+        case 0xBC:
+        case 0xBD:
+        case 0xBE:
+        case 0xBF:
+            return 5;
+        case 0xFF:
+            nextop = PK(1);
+            switch((nextop>>3)&7) {
+                case 0: // INC Ed
+                case 1: //DEC Ed
+                case 2: // CALL Ed
+                case 4: // JMP Ed
+                case 6: // Push Ed
+                    return fakeed(dyn, addr+2, 0, nextop)-addr;
+            }
+            break;
+        default:
+            break;
+    }
+    return -1;
+}
+#undef PK
+
+int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n)
+{
+    int i = 0;
+    while(i<n) {
+        int j=next_instruction(dyn, addr+i);
+        if(j<=0) return 0;
+        i+=j;
+    }
+    return (i==n)?1:0;
+}
+
+uint32_t needed_flags(dynarec_arm_t *dyn, int ninst, uint32_t setf, int recurse)
+{
+    if(recurse == 10)
+        return X_PEND;
+    if(ninst == dyn->size)
+        return X_PEND; // no more instructions, or too many jmp loop, stop
+    
+    uint32_t needed = dyn->insts[ninst].x64.use_flags;
+    if(needed) {
+        setf &= ~needed;
+        if(!setf)   // all flags already used, no need to continue
+            return needed;
+    }
+
+    if(!needed && !dyn->insts[ninst].x64.set_flags && !dyn->insts[ninst].x64.jmp_insts) {
+        int start = ninst;
+        int end = ninst;
+        while(end<dyn->size && !dyn->insts[end].x64.use_flags && !dyn->insts[end].x64.set_flags && !dyn->insts[end].x64.jmp_insts)
+            ++end;
+        needed = needed_flags(dyn, end, setf, recurse);
+        for(int i=start; i<end; ++i)
+            dyn->insts[i].x64.need_flags = needed;
+        return needed;
+    }
+
+    if(dyn->insts[ninst].x64.set_flags && (dyn->insts[ninst].x64.state_flags!=SF_MAYSET)) {
+        if((setf & ~dyn->insts[ninst].x64.set_flags) == 0)
+            return needed;    // all done, gives all the flags needed
+        setf |= dyn->insts[ninst].x64.set_flags;    // add new flags to continue
+    }
+
+    int jinst = dyn->insts[ninst].x64.jmp_insts;
+    if(dyn->insts[ninst].x64.jmp) {
+        dyn->insts[ninst].x64.need_flags = (jinst==-1)?X_PEND:needed_flags(dyn, jinst, setf, recurse+1);
+        if(dyn->insts[ninst].x64.use_flags)  // conditionnal jump
+             dyn->insts[ninst].x64.need_flags |= needed_flags(dyn, ninst+1, setf, recurse);
+    } else
+        dyn->insts[ninst].x64.need_flags = needed_flags(dyn, ninst+1, setf, recurse);
+    if(dyn->insts[ninst].x64.state_flags==SF_MAYSET)
+        needed |= dyn->insts[ninst].x64.need_flags;
+    else
+        needed |= (dyn->insts[ninst].x64.need_flags & ~dyn->insts[ninst].x64.set_flags);
+    if(needed == (X_PEND|X_ALL))
+        needed = X_ALL;
+    return needed;
+}
+
+instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int arm_size)
+{
+    // x64 instruction is <16 bytes
+    int toadd;
+    if(x64_size>arm_size)
+        toadd = 1 + x64_size/15;
+    else
+        toadd = 1 + arm_size/15;
+    if((*size)+toadd>(*cap)) {
+        *cap = (*size)+toadd;
+        insts = (instsize_t*)realloc(insts, (*cap)*sizeof(instsize_t));
+    }
+    while(toadd) {
+        if(x64_size>15)
+            insts[*size].x64 = 15;    
+        else
+            insts[*size].x64 = x64_size;
+        x64_size -= insts[*size].x64;
+        if(arm_size>15)
+            insts[*size].nat = 15;
+        else
+            insts[*size].nat = arm_size;
+        arm_size -= insts[*size].nat;
+        ++(*size);
+        --toadd;
+    }
+    return insts;
+}
+
+void arm_pass0(dynarec_arm_t* dyn, uintptr_t addr);
+void arm_pass1(dynarec_arm_t* dyn, uintptr_t addr);
+void arm_pass2(dynarec_arm_t* dyn, uintptr_t addr);
+void arm_pass3(dynarec_arm_t* dyn, uintptr_t addr);
+
+void* FillBlock(dynablock_t* block, uintptr_t addr) {
+    if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end)
+        return NULL;
+    // init the helper
+    dynarec_arm_t helper = {0};
+    helper.start = addr;
+    arm_pass0(&helper, addr);
+    if(!helper.size) {
+        dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
+        block->done = 1;
+        free(helper.next);
+        return (void*)block;
+    }
+    helper.cap = helper.size+3; // needs epilog handling
+    helper.insts = (instruction_arm64_t*)calloc(helper.cap, sizeof(instruction_arm64_t));
+    // pass 1, addresses, x64 jump addresses, flags
+    arm_pass1(&helper, addr);
+    // calculate barriers
+    uintptr_t start = helper.insts[0].x64.addr;
+    uintptr_t end = helper.insts[helper.size].x64.addr+helper.insts[helper.size].x64.size;
+    for(int i=0; i<helper.size; ++i)
+        if(helper.insts[i].x64.jmp) {
+            uintptr_t j = helper.insts[i].x64.jmp;
+            if(j<start || j>=end)
+                helper.insts[i].x64.jmp_insts = -1;
+            else {
+                // find jump address instruction
+                int k=-1;
+                for(int i2=0; i2<helper.size && k==-1; ++i2) {
+                    if(helper.insts[i2].x64.addr==j)
+                        k=i2;
+                }
+                if(k!=-1)   // -1 if not found, mmm, probably wrong, exit anyway
+                    helper.insts[k].x64.barrier = 1;
+                helper.insts[i].x64.jmp_insts = k;
+            }
+        }
+    for(int i=0; i<helper.size; ++i)
+        if(helper.insts[i].x64.set_flags && !helper.insts[i].x64.need_flags) {
+            helper.insts[i].x64.need_flags = needed_flags(&helper, i+1, helper.insts[i].x64.set_flags, 0);
+            if((helper.insts[i].x64.need_flags&X_PEND) && (helper.insts[i].x64.state_flags==SF_MAYSET))
+                helper.insts[i].x64.need_flags = X_ALL;
+        }
+    
+    // pass 2, instruction size
+    arm_pass2(&helper, addr);
+    // ok, now allocate mapped memory, with executable flag on
+    int sz = helper.arm_size;
+    void* p = (void*)AllocDynarecMap(block, sz);
+    if(p==NULL) {
+        dynarec_log(LOG_DEBUG, "AllocDynarecMap(%p, %d) failed, cancelling block\n", block, sz);
+        free(helper.insts);
+        free(helper.next);
+        return NULL;
+    }
+    helper.block = p;
+    helper.arm_start = (uintptr_t)p;
+    if(helper.sons_size) {
+        helper.sons_x64 = (uintptr_t*)calloc(helper.sons_size, sizeof(uintptr_t));
+        helper.sons_arm = (void**)calloc(helper.sons_size, sizeof(void*));
+    }
+    // pass 3, emit (log emit arm opcode)
+    if(box64_dynarec_dump) {
+        dynarec_log(LOG_NONE, "%s%04d|Emitting %d bytes for %d x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.arm_size, helper.isize); 
+        printFunctionAddr(helper.start, " => ");
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");
+    }
+    helper.arm_size = 0;
+    arm_pass3(&helper, addr);
+    if(sz!=helper.arm_size) {
+        printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%d) & pass3 (%d)!\n", sz, helper.arm_size);
+        uint8_t *dump = (uint8_t*)helper.start;
+        printf_log(LOG_NONE, "Dump of %d x64 opcodes:\n", helper.size);
+        for(int i=0; i<helper.size; ++i) {
+            printf_log(LOG_NONE, "%p:", dump);
+            for(; dump<(uint8_t*)helper.insts[i+1].x64.addr; ++dump)
+                printf_log(LOG_NONE, " %02X", *dump);
+            printf_log(LOG_NONE, "\t%d -> %d\n", helper.insts[i].size2, helper.insts[i].size);
+        }
+        printf_log(LOG_NONE, " ------------\n");
+    }
+    // all done...
+    __clear_cache(p, p+sz);   // need to clear the cache before execution...
+    // keep size of instructions for signal handling
+    {
+        size_t cap = 1;
+        for(int i=0; i<helper.size; ++i)
+            cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15;
+        size_t size = 0;
+        block->instsize = (instsize_t*)calloc(cap, sizeof(instsize_t));
+        for(int i=0; i<helper.size; ++i)
+            block->instsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4);
+        block->instsize = addInst(block->instsize, &size, &cap, 0, 0);    // add a "end of block" mark, just in case
+    }
+    // ok, free the helper now
+    free(helper.insts);
+    free(helper.next);
+    block->size = sz;
+    block->isize = helper.size;
+    block->block = p;
+    block->need_test = 0;
+    //block->x64_addr = (void*)start;
+    block->x64_size = end-start;
+    if(box64_dynarec_largest<block->x64_size)
+        box64_dynarec_largest = block->x64_size;
+    block->hash = X31_hash_code(block->x64_addr, block->x64_size);
+    // fill sons if any
+    dynablock_t** sons = NULL;
+    int sons_size = 0;
+    if(helper.sons_size) {
+        sons = (dynablock_t**)calloc(helper.sons_size, sizeof(dynablock_t*));
+        for (int i=0; i<helper.sons_size; ++i) {
+            int created = 1;
+            dynablock_t *son = AddNewDynablock(block->parent, helper.sons_x64[i], &created);
+            if(created) {    // avoid breaking a working block!
+                son->block = helper.sons_arm[i];
+                son->x64_addr = (void*)helper.sons_x64[i];
+                son->x64_size = end-helper.sons_x64[i];
+                if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / ARM=%p)", son->x64_addr, son->block);}
+                son->father = block;
+                son->done = 1;
+                sons[sons_size++] = son;
+                if(!son->parent)
+                    son->parent = block->parent;
+            }
+        }
+        if(sons_size) {
+            block->sons = sons;
+            block->sons_size = sons_size;
+        } else
+            free(sons);
+    }
+    free(helper.sons_x64);
+    free(helper.sons_arm);
+    block->done = 1;
+    return (void*)block;
+}
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c
new file mode 100755
index 00000000..784739ac
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_functions.c
@@ -0,0 +1,354 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <string.h>
+#include <math.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "emu/x87emu_private.h"
+#include "x64trace.h"
+#include "signals.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+
+void arm_fstp(x64emu_t* emu, void* p)
+{
+    if(ST0.q!=STld(0).ref)
+        D2LD(&ST0.d, p);
+    else
+        memcpy(p, &STld(0).ld, 10);
+}
+
+void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n)
+{
+    dynarec_log(LOG_DEBUG, "R%ld=0x%lx (%ld)\n", n, reg, reg);
+}
+
+void arm_f2xm1(x64emu_t* emu)
+{
+    ST0.d = exp2(ST0.d) - 1.0;
+}
+void arm_fyl2x(x64emu_t* emu)
+{
+    ST(1).d = log2(ST0.d)*ST(1).d;
+}
+void arm_ftan(x64emu_t* emu)
+{
+    ST0.d = tan(ST0.d);
+}
+void arm_fpatan(x64emu_t* emu)
+{
+    ST1.d = atan2(ST1.d, ST0.d);
+}
+void arm_fxtract(x64emu_t* emu)
+{
+    int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52;
+    tmp32s -= 1023;
+    ST1.d /= exp2(tmp32s);
+    ST0.d = tmp32s;
+}
+void arm_fprem(x64emu_t* emu)
+{
+    int32_t tmp32s = ST0.d / ST1.d;
+    ST0.d -= ST1.d * tmp32s;
+    emu->sw.f.F87_C2 = 0;
+    emu->sw.f.F87_C0 = (tmp32s&1);
+    emu->sw.f.F87_C3 = ((tmp32s>>1)&1);
+    emu->sw.f.F87_C1 = ((tmp32s>>2)&1);
+}
+void arm_fyl2xp1(x64emu_t* emu)
+{
+    ST(1).d = log2(ST0.d + 1.0)*ST(1).d;
+}
+void arm_fsincos(x64emu_t* emu)
+{
+    sincos(ST1.d, &ST1.d, &ST0.d);
+}
+void arm_frndint(x64emu_t* emu)
+{
+    ST0.d = fpu_round(emu, ST0.d);
+}
+void arm_fscale(x64emu_t* emu)
+{
+    ST0.d *= exp2(trunc(ST1.d));
+}
+void arm_fsin(x64emu_t* emu)
+{
+    ST0.d = sin(ST0.d);
+}
+void arm_fcos(x64emu_t* emu)
+{
+    ST0.d = cos(ST0.d);
+}
+
+void arm_fbld(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_fbld(emu, ed);
+}
+
+void arm_fild64(x64emu_t* emu, int64_t* ed)
+{
+    int64_t tmp;
+    memcpy(&tmp, ed, sizeof(tmp));
+    ST0.d = tmp;
+    STll(0).ll = tmp;
+    STll(0).ref = ST0.q;
+}
+
+void arm_fbstp(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_fbst(emu, ed);
+}
+
+void arm_fistp64(x64emu_t* emu, int64_t* ed)
+{
+    // used of memcpy to avoid aligments issues
+    if(STll(0).ref==ST(0).q) {
+        memcpy(ed, &STll(0).ll, sizeof(int64_t));
+    } else {
+        int64_t tmp;
+        if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))
+            tmp = 0x8000000000000000LL;
+        else
+            tmp = fpu_round(emu, ST0.d);
+        memcpy(ed, &tmp, sizeof(tmp));
+    }
+}
+
+void arm_fistt64(x64emu_t* emu, int64_t* ed)
+{
+    // used of memcpy to avoid aligments issues
+    int64_t tmp = ST0.d;
+    memcpy(ed, &tmp, sizeof(tmp));
+}
+
+void arm_fld(x64emu_t* emu, uint8_t* ed)
+{
+    memcpy(&STld(0).ld, ed, 10);
+    LD2D(&STld(0), &ST(0).d);
+    STld(0).ref = ST0.q;
+}
+
+void arm_ud(x64emu_t* emu)
+{
+    emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+}
+
+void arm_fsave(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_savenv(emu, (char*)ed, 0);
+
+    uint8_t* p = ed;
+    p += 28;
+    for (int i=0; i<8; ++i) {
+        LD2D(p, &ST(i).d);
+        p+=10;
+    }
+}
+void arm_frstor(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_loadenv(emu, (char*)ed, 0);
+
+    uint8_t* p = ed;
+    p += 28;
+    for (int i=0; i<8; ++i) {
+        D2LD(&ST(i).d, p);
+        p+=10;
+    }
+
+}
+
+void arm_fprem1(x64emu_t* emu)
+{
+    // simplified version
+    int32_t tmp32s = round(ST0.d / ST1.d);
+    ST0.d -= ST1.d*tmp32s;
+    emu->sw.f.F87_C2 = 0;
+    emu->sw.f.F87_C0 = (tmp32s&1);
+    emu->sw.f.F87_C3 = ((tmp32s>>1)&1);
+    emu->sw.f.F87_C1 = ((tmp32s>>2)&1);
+}
+
+
+// Get a FPU single scratch reg
+int fpu_get_scratch_single(dynarec_arm_t* dyn)
+{
+    return dyn->fpu_scratch++;  // return an Sx
+}
+// Get a FPU double scratch reg
+int fpu_get_scratch_double(dynarec_arm_t* dyn)
+{
+    int i = (dyn->fpu_scratch+1)&(~1);
+    dyn->fpu_scratch = i+2;
+    return i/2; // return a Dx
+}
+// Get a FPU quad scratch reg
+int fpu_get_scratch_quad(dynarec_arm_t* dyn)
+{
+    if(dyn->fpu_scratch>4) {
+        if(dyn->fpu_extra_qscratch) {
+            dynarec_log(LOG_NONE, "Warning, Extra QScratch slot taken and need another one!\n");
+        } else
+            dyn->fpu_extra_qscratch = fpu_get_reg_quad(dyn);
+        return dyn->fpu_extra_qscratch;
+    }
+    int i = (dyn->fpu_scratch+3)&(~3);
+    dyn->fpu_scratch = i+4;
+    return i/2; // Return a Dx, not a Qx
+}
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_arm_t* dyn)
+{
+    dyn->fpu_scratch = 0;
+    if(dyn->fpu_extra_qscratch) {
+        fpu_free_reg_quad(dyn, dyn->fpu_extra_qscratch);
+        dyn->fpu_extra_qscratch = 0;
+    }
+}
+#define FPUFIRST    8
+// Get a FPU double reg
+int fpu_get_reg_double(dynarec_arm_t* dyn)
+{
+    // TODO: check upper limit?
+    int i=0;
+    while (dyn->fpuused[i]) ++i;
+    dyn->fpuused[i] = 1;
+    return i+FPUFIRST; // return a Dx
+}
+// Free a FPU double reg
+void fpu_free_reg_double(dynarec_arm_t* dyn, int reg)
+{
+    // TODO: check upper limit?
+    int i=reg-FPUFIRST;
+    dyn->fpuused[i] = 0;
+}
+// Get a FPU quad reg
+int fpu_get_reg_quad(dynarec_arm_t* dyn)
+{
+    int i=0;
+    while (dyn->fpuused[i] || dyn->fpuused[i+1]) i+=2;
+    dyn->fpuused[i] = dyn->fpuused[i+1] = 1;
+    return i+FPUFIRST; // Return a Dx, not a Qx
+}
+// Free a FPU quad reg
+void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg)
+{
+    int i=reg-FPUFIRST;
+    dyn->fpuused[i] = dyn->fpuused[i+1] = 0;
+}
+// Reset fpu regs counter
+void fpu_reset_reg(dynarec_arm_t* dyn)
+{
+    dyn->fpu_reg = 0;
+    for (int i=0; i<24; ++i)
+        dyn->fpuused[i]=0;
+}
+
+#define F8      *(uint8_t*)(addr++)
+#define F32     *(uint32_t*)(addr+=4, addr-4)
+// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD
+int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity)
+{
+
+    uint32_t tested = (1<<parity)-1;
+    if((nextop&0xC0)==0xC0)
+        return 0;   // direct register, no parity...
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            int sib_reg = (sib>>3)&7;
+            if((sib&0x7)==5) {
+                uint32_t tmp = F32;
+                if (sib_reg!=4) {
+                    // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough
+                    return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0;
+                } else {
+                    // just a constant...
+                    return (tmp&tested)?0:1;
+                }
+            } else {
+                if(sib_reg==4 && parity<3)
+                    return 0;   // simple [reg]
+                // don't try [reg1 + reg2<<N], unless reg1 is ESP
+                return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0;
+            }
+        } else if((nextop&7)==5) {
+            uint32_t tmp = F32;
+            return (tmp&tested)?0:1;
+        } else {
+            return 0;
+        }
+    } else {
+        return 0; //Form [reg1 + reg2<<N + XXXXXX]
+    }
+}
+
+// Do the GETED, but don't emit anything...
+uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) 
+{
+    if((nextop&0xC0)==0xC0)
+        return addr;
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            if((sib&0x7)==5) {
+                addr+=4;
+            }
+        } else if((nextop&7)==5) {
+            addr+=4;
+        }
+    } else {
+        if((nextop&7)==4) {
+            ++addr;
+        }
+        if(nextop&0x80) {
+            addr+=4;
+        } else {
+            ++addr;
+        }
+    }
+    return addr;
+}
+#undef F8
+#undef F32
+
+int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn)
+{
+#define PK(a)       *(uint8_t*)(addr+a)
+#define PK64(a)     *(uint64_t*)(addr+a)
+
+    if(!addr)
+        return 0;
+    if(PK(0)==0xff && PK(1)==0x25) {  // absolute jump, maybe the GOT
+        uintptr_t a1 = (PK64(2));   // need to add a check to see if the address is from the GOT !
+        addr = *(uintptr_t*)a1; 
+    }
+    if(addr<0x10000)    // too low, that is suspicious
+        return 0;
+    onebridge_t *b = (onebridge_t*)(addr);
+    if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) {
+        // found !
+        if(retn) *retn = (b->C3==0xC2)?b->N:0;
+        if(calladdress) *calladdress = addr+1;
+        return 1;
+    }
+    return 0;
+#undef PK32
+#undef PK
+}
+
diff --git a/src/dynarec/dynarec_arm64_functions.h b/src/dynarec/dynarec_arm64_functions.h
new file mode 100755
index 00000000..d932aa4d
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_functions.h
@@ -0,0 +1,61 @@
+#ifndef __DYNAREC_ARM_FUNCTIONS_H__
+#define __DYNAREC_ARM_FUNCTIONS_H__
+
+typedef struct x64emu_s x64emu_t;
+
+void arm_fstp(x64emu_t* emu, void* p);
+
+void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n);
+
+void arm_f2xm1(x64emu_t* emu);
+void arm_fyl2x(x64emu_t* emu);
+void arm_ftan(x64emu_t* emu);
+void arm_fpatan(x64emu_t* emu);
+void arm_fxtract(x64emu_t* emu);
+void arm_fprem(x64emu_t* emu);
+void arm_fyl2xp1(x64emu_t* emu);
+void arm_fsincos(x64emu_t* emu);
+void arm_frndint(x64emu_t* emu);
+void arm_fscale(x64emu_t* emu);
+void arm_fsin(x64emu_t* emu);
+void arm_fcos(x64emu_t* emu);
+void arm_fbld(x64emu_t* emu, uint8_t* ed);
+void arm_fild64(x64emu_t* emu, int64_t* ed);
+void arm_fbstp(x64emu_t* emu, uint8_t* ed);
+void arm_fistp64(x64emu_t* emu, int64_t* ed);
+void arm_fistt64(x64emu_t* emu, int64_t* ed);
+void arm_fld(x64emu_t* emu, uint8_t* ed);
+void arm_fsave(x64emu_t* emu, uint8_t* ed);
+void arm_frstor(x64emu_t* emu, uint8_t* ed);
+void arm_fprem1(x64emu_t* emu);
+
+void arm_ud(x64emu_t* emu);
+
+// Get an FPU single scratch reg
+int fpu_get_scratch_single(dynarec_arm_t* dyn);
+// Get an FPU double scratch reg
+int fpu_get_scratch_double(dynarec_arm_t* dyn);
+// Get an FPU quad scratch reg
+int fpu_get_scratch_quad(dynarec_arm_t* dyn);
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_arm_t* dyn);
+// Get an FPU double reg
+int fpu_get_reg_double(dynarec_arm_t* dyn);
+// Free a FPU double reg
+void fpu_free_reg_double(dynarec_arm_t* dyn, int reg);
+// Get an FPU quad reg
+int fpu_get_reg_quad(dynarec_arm_t* dyn);
+// Free a FPU quad reg
+void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg);
+// Reset fpu regs counter
+void fpu_reset_reg(dynarec_arm_t* dyn);
+
+// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD
+int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity);
+// Do the GETED, but don't emit anything...
+uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
+
+// Is what pointed at addr a native call? And if yes, to what function?
+int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn);
+
+#endif //__DYNAREC_ARM_FUNCTIONS_H__
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h
new file mode 100755
index 00000000..8101cc2b
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_private.h
@@ -0,0 +1,56 @@
+#ifndef __DYNAREC_ARM_PRIVATE_H_
+#define __DYNAREC_ARM_PRIVATE_H_
+
+#include "dynarec_private.h"
+
+typedef struct x64emu_s x64emu_t;
+
+typedef struct instruction_arm_s {
+    instruction_x64_t   x64;
+    uintptr_t           address;    // (start) address of the arm emited instruction
+    uintptr_t           epilog;     // epilog of current instruction (can be start of next, of barrier stuff)
+    int                 size;       // size of the arm emited instruction
+    int                 size2;      // size of the arm emited instrucion after pass2
+    uintptr_t           mark, mark2, mark3;
+    uintptr_t           markf;
+    uintptr_t           markseg;
+    uintptr_t           marklock;
+    int                 pass2choice;// value for choices that are fixed on pass2 for pass3
+    uintptr_t           natcall;
+    int                 retn;
+} instruction_arm64_t;
+
+typedef struct dynarec_arm_s {
+    instruction_arm64_t *insts;
+    int32_t             size;
+    int32_t             cap;
+    uintptr_t           start;      // start of the block
+    uint32_t            isize;      // size in byte of x64 instructions included
+    void*               block;      // memory pointer where next instruction is emited
+    uintptr_t           arm_start;  // start of the arm code
+    int                 arm_size;   // size of emitted arm code
+    int                 state_flags;// actual state for on-demand flags
+    int                 x87cache[8];// cache status for the 8 x87 register behind the fpu stack
+    int                 x87reg[8];  // reg used for x87cache entry
+    int                 mmxcache[8];// cache status for the 8 MMX registers
+    int                 ssecache[8];// cache status for the 8 SSE(2) registers
+    int                 fpuused[24];// all 8..31 double reg from fpu, used by x87, sse and mmx
+    int                 x87stack;   // cache stack counter
+    int                 fpu_scratch;// scratch counter
+    int                 fpu_extra_qscratch; // some opcode need an extra quad scratch register
+    int                 fpu_reg;    // x87/sse/mmx reg counter
+    int                 dfnone;     // if defered flags is already set to df_none
+    uintptr_t*          next;       // variable array of "next" jump address
+    int                 next_sz;
+    int                 next_cap;
+    uintptr_t*          sons_x64;   // the x64 address of potential dynablock sons
+    void**              sons_arm;   // the arm address of potential dynablock sons
+    int                 sons_size;  // number of potential dynablock sons
+} dynarec_arm_t;
+
+void add_next(dynarec_arm_t *dyn, uintptr_t addr);
+uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr);
+int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n);
+int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n);
+
+#endif //__DYNAREC_ARM_PRIVATE_H_
\ No newline at end of file
diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h
new file mode 100755
index 00000000..66eb3bfa
--- /dev/null
+++ b/src/dynarec/dynarec_private.h
@@ -0,0 +1,35 @@
+#ifndef __DYNAREC_PRIVATE_H_
+#define __DYNAREC_PRIVATE_H_
+
+// all flags for the use_flags field
+#define X_CF    (1<<0)
+#define X_PF    (1<<1)
+#define X_AF    (1<<2)
+#define X_ZF    (1<<3)
+#define X_SF    (1<<4)
+#define X_OF    (1<<5)
+#define X_ALL   ((1<<6)-1)
+#define X_PEND  (0x1000)
+
+// all state flags
+#define SF_UNKNOWN  0
+#define SF_SET      1
+#define SF_PENDING  2
+#define SF_MAYSET   3
+#define SF_SUBSET   4
+
+typedef struct instruction_x64_s {
+    uintptr_t   addr;   //address of the instruction
+    int32_t     size;   // size of the instruction
+    int         barrier; // next instruction is a jump point, so no optim allowed
+    uintptr_t   jmp;    // offset to jump to, even if conditionnal (0 if not), no relative offset here
+    int         jmp_insts;  // instuction to jump to (-1 if out of the block)
+    uint32_t    use_flags;  // 0 or combination of X_?F
+    uint32_t    set_flags;  // 0 or combination of X_?F
+    uint32_t    need_flags; // calculated
+    int         state_flags; // One of SF_XXX state
+} instruction_x64_t;
+
+void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name);
+
+#endif //__DYNAREC_PRIVATE_H_