about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-14 17:58:04 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-14 17:58:04 +0100
commit4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6 (patch)
treee89e9892fa166aa348b8c9f902de7428e875c7bc /src
parent3dda84e58b148f92b2bb4d94caacc84011fa3919 (diff)
downloadbox64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.tar.gz
box64-4919f161cc7a0cfa31f91b0d1e2d0ff600044ff6.zip
[DYNAREC] Added Basic blocks for dynarec
Diffstat (limited to 'src')
-rw-r--r--src/custommem.c869
-rwxr-xr-xsrc/dynarec/arm64_epilog.S81
-rwxr-xr-xsrc/dynarec/arm64_lock_helper.S87
-rwxr-xr-xsrc/dynarec/arm64_lock_helper.h31
-rwxr-xr-xsrc/dynarec/arm64_next.S47
-rwxr-xr-xsrc/dynarec/arm64_printer.c14
-rw-r--r--src/dynarec/arm64_printer.h6
-rwxr-xr-xsrc/dynarec/arm64_prolog.S53
-rwxr-xr-xsrc/dynarec/dynablock.c422
-rwxr-xr-xsrc/dynarec/dynablock_private.h36
-rwxr-xr-xsrc/dynarec/dynarec.c38
-rwxr-xr-xsrc/dynarec/dynarec_arm64.c449
-rwxr-xr-xsrc/dynarec/dynarec_arm64_functions.c354
-rwxr-xr-xsrc/dynarec/dynarec_arm64_functions.h61
-rwxr-xr-xsrc/dynarec/dynarec_arm64_private.h56
-rwxr-xr-xsrc/dynarec/dynarec_private.h35
-rwxr-xr-xsrc/elfs/elfloader.c8
-rwxr-xr-xsrc/emu/x64emu_private.h5
-rwxr-xr-xsrc/emu/x64run.c38
-rw-r--r--src/emu/x64run0f.c2
-rw-r--r--src/emu/x64run64.c4
-rw-r--r--src/emu/x64run66.c3
-rw-r--r--src/emu/x64run660f.c4
-rw-r--r--src/emu/x64run6664.c4
-rw-r--r--src/emu/x64run66d9.c5
-rw-r--r--src/emu/x64run66dd.c5
-rw-r--r--src/emu/x64run67.c4
-rw-r--r--src/emu/x64rund8.c3
-rw-r--r--src/emu/x64rund9.c5
-rw-r--r--src/emu/x64runda.c3
-rw-r--r--src/emu/x64rundb.c5
-rw-r--r--src/emu/x64rundd.c3
-rw-r--r--src/emu/x64rundf.c5
-rw-r--r--src/emu/x64runf0.c176
-rw-r--r--src/emu/x64runf20f.c4
-rw-r--r--src/emu/x64runf30f.c5
-rwxr-xr-xsrc/include/box64context.h2
-rw-r--r--src/include/custommem.h8
-rwxr-xr-xsrc/include/debug.h7
-rwxr-xr-xsrc/include/dynablock.h31
-rwxr-xr-xsrc/include/dynarec_arm64.h9
-rwxr-xr-xsrc/libtools/signals.c109
-rwxr-xr-xsrc/libtools/threads.c2
-rwxr-xr-xsrc/main.c87
-rwxr-xr-xsrc/tools/bridge.c5
45 files changed, 2576 insertions, 614 deletions
diff --git a/src/custommem.c b/src/custommem.c
index 8febe5a0..62f15664 100644
--- a/src/custommem.c
+++ b/src/custommem.c
@@ -23,19 +23,21 @@
 #include "khash.h"
 #ifdef DYNAREC
 #include "dynablock.h"
-#include "dynarec/arm_lock_helper.h"
+#include "dynarec/arm64_lock_helper.h"
 
-#define USE_MMAP
+//#define USE_MMAP
 
 // init inside dynablocks.c
 KHASH_MAP_INIT_INT64(dynablocks, dynablock_t*)
-static dynablocklist_t*    dynmap[DYNAMAP_SIZE];     // 4G of memory mapped by 4K block
+static dynablocklist_t***  dynmap123[1<<DYNAMAP_SHIFT]; // 64bits.. in 4x16bits array
 static pthread_mutex_t     mutex_mmap;
 static mmaplist_t          *mmaplist;
 static int                 mmapsize;
 static kh_dynablocks_t     *dblist_oversized;      // store the list of oversized dynablocks (normal sized are inside mmaplist)
-static uintptr_t           *box64_jumptable[JMPTABL_SIZE];
-static uintptr_t           box64_jmptbl_default[1<<JMPTABL_SHIFT];
+static uintptr_t***        box64_jmptbl3[1<<JMPTABL_SHIFT];
+static uintptr_t**         box64_jmptbldefault2[1<<JMPTABL_SHIFT];
+static uintptr_t*          box64_jmptbldefault1[1<<JMPTABL_SHIFT];
+static uintptr_t           box64_jmptbldefault0[1<<JMPTABL_SHIFT];
 #endif
 #define MEMPROT_SHIFT 12
 #define MEMPROT_SIZE (1<<(32-MEMPROT_SHIFT))
@@ -284,349 +286,433 @@ void customFree(void* p)
 }
 
 #ifdef DYNAREC
-//typedef struct mmaplist_s {
-//    void*               block;
-//    int                 maxfree;
-//    size_t              size;
-//    kh_dynablocks_t*    dblist;
-//    uint8_t*            helper;
-//} mmaplist_t;
-
-//uintptr_t FindFreeDynarecMap(dynablock_t* db, int size)
-//{
-//    // look for free space
-//    void* sub = NULL;
-//    for(int i=0; i<mmapsize; ++i) {
-//        if(mmaplist[i].maxfree>=size) {
-//            int rsize = 0;
-//            sub = getFirstBlock(mmaplist[i].block, size, &rsize);
-//            if(sub) {
-//                uintptr_t ret = (uintptr_t)allocBlock(mmaplist[i].block, sub, size);
-//                if(rsize==mmaplist[i].maxfree)
-//                    mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size);
-//                kh_dynablocks_t *blocks = mmaplist[i].dblist;
-//                if(!blocks) {
-//                    blocks = mmaplist[i].dblist = kh_init(dynablocks);
-//                    kh_resize(dynablocks, blocks, 64);
-//                }
-//                khint_t k;
-//                int r;
-//                k = kh_put(dynablocks, blocks, (uintptr_t)ret, &r);
-//                kh_value(blocks, k) = db;
-//                for(int j=0; j<size; ++j)
-//                    mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+j] = (j<256)?j:255;
-//                return ret;
-//            }
-//        }
-//    }
-//    return 0;
-//}
-
-//uintptr_t AddNewDynarecMap(dynablock_t* db, int size)
-//{
-//    int i = mmapsize++;    // yeah, useful post incrementation
-//    dynarec_log(LOG_DEBUG, "Ask for DynaRec Block Alloc #%d\n", mmapsize);
-//    mmaplist = (mmaplist_t*)realloc(mmaplist, mmapsize*sizeof(mmaplist_t));
-//    #ifndef USE_MMAP
-//    void *p = NULL;
-//    if(posix_memalign(&p, box64_pagesize, MMAPSIZE)) {
-//        dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i);
-//        --mmapsize;
-//        return 0;
-//    }
-//    mprotect(p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
-//    #else
-//    void* p = mmap(NULL, MMAPSIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
-//    if(p==(void*)-1) {
-//        dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i);
-//        --mmapsize;
-//        return 0;
-//    }
-//    #endif
-//    setProtection((uintptr_t)p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
-//
-//    mmaplist[i].block = p;
-//    mmaplist[i].size = MMAPSIZE;
-//    mmaplist[i].helper = (uint8_t*)calloc(1, MMAPSIZE);
-//    // setup marks
-//    blockmark_t* m = (blockmark_t*)p;
-//    m->prev.x32 = 0;
-//    m->next.fill = 0;
-//    m->next.size = MMAPSIZE-sizeof(blockmark_t);
-//    m = (blockmark_t*)(p+MMAPSIZE-sizeof(blockmark_t));
-//    m->next.x32 = 0;
-//    m->prev.fill = 0;
-//    m->prev.size = MMAPSIZE-sizeof(blockmark_t);
-//    // alloc 1st block
-//    uintptr_t sub  = (uintptr_t)allocBlock(mmaplist[i].block, p, size);
-//    mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size);
-//    kh_dynablocks_t *blocks = mmaplist[i].dblist = kh_init(dynablocks);
-//    kh_resize(dynablocks, blocks, 64);
-//    khint_t k;
-//    int ret;
-//    k = kh_put(dynablocks, blocks, (uintptr_t)sub, &ret);
-//    kh_value(blocks, k) = db;
-//    for(int j=0; j<size; ++j)
-//        mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block + j] = (j<256)?j:255;
-//    return sub;
-//}
-
-//void ActuallyFreeDynarecMap(dynablock_t* db, uintptr_t addr, int size)
-//{
-//    if(!addr || !size)
-//        return;
-//    for(int i=0; i<mmapsize; ++i) {
-//        if ((addr>(uintptr_t)mmaplist[i].block) 
-//         && (addr<((uintptr_t)mmaplist[i].block+mmaplist[i].size))) {
-//            void* sub = (void*)(addr-sizeof(blockmark_t));
-//            freeBlock(mmaplist[i].block, sub);
-//            mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size);
-//            kh_dynablocks_t *blocks = mmaplist[i].dblist;
-//            if(blocks) {
-//                khint_t k = kh_get(dynablocks, blocks, (uintptr_t)sub);
-//                if(k!=kh_end(blocks))
-//                    kh_del(dynablocks, blocks, k);
-//                for(int j=0; j<size; ++j)
-//                    mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block+j] = 0;
-//            }
-//            return;
-//        }
-//    }
-//    if(mmapsize)
-//        dynarec_log(LOG_NONE, "Warning, block %p (size %d) not found in mmaplist for Free\n", (void*)addr, size);
-//}
-
-//dynablock_t* FindDynablockFromNativeAddress(void* addr)
-//{
-//    // look in actual list
-//    for(int i=0; i<mmapsize; ++i) {
-//        if ((uintptr_t)addr>=(uintptr_t)mmaplist[i].block 
-//        && ((uintptr_t)addr<(uintptr_t)mmaplist[i].block+mmaplist[i].size)) {
-//            if(!mmaplist[i].helper)
-//                return FindDynablockDynablocklist(addr, mmaplist[i].dblist);
-//            else {
-//                uintptr_t p = (uintptr_t)addr - (uintptr_t)mmaplist[i].block;
-//                while(mmaplist[i].helper[p]) p -= mmaplist[i].helper[p];
-//                khint_t k = kh_get(dynablocks, mmaplist[i].dblist, (uintptr_t)mmaplist[i].block + p);
-//                if(k!=kh_end(mmaplist[i].dblist))
-//                    return kh_value(mmaplist[i].dblist, k);
-//                return NULL;
-//            }
-//        }
-//    }
-//    // look in oversized
-//    return FindDynablockDynablocklist(addr, dblist_oversized);
-//}
-
-//uintptr_t AllocDynarecMap(dynablock_t* db, int size)
-//{
-//    if(!size)
-//        return 0;
-//    if(size>MMAPSIZE-2*sizeof(blockmark_t)) {
-//        #ifndef USE_MMAP
-//        void *p = NULL;
-//        if(posix_memalign(&p, box64_pagesize, size)) {
-//            dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size);
-//            return 0;
-//        }
-//        mprotect(p, size, PROT_READ | PROT_WRITE | PROT_EXEC);
-//        #else
-//        void* p = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
-//        if(p==(void*)-1) {
-//            dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size);
-//            return 0;
-//        }
-//        #endif
-//        setProtection((uintptr_t)p, size, PROT_READ | PROT_WRITE | PROT_EXEC);
-//        kh_dynablocks_t *blocks = dblist_oversized;
-//        if(!blocks) {
-//            blocks = dblist_oversized = kh_init(dynablocks);
-//            kh_resize(dynablocks, blocks, 64);
-//        }
-//        khint_t k;
-//        int ret;
-//        k = kh_put(dynablocks, blocks, (uintptr_t)p, &ret);
-//        kh_value(blocks, k) = db;
-//        return (uintptr_t)p;
-//    }
-//    
-//    if(pthread_mutex_trylock(&mutex_mmap)) {
-//        sched_yield();  // give it a chance
-//        if(pthread_mutex_trylock(&mutex_mmap))
-//            return 0;   // cannot lock, baillout
-//    }
-//
-//    uintptr_t ret = FindFreeDynarecMap(db, size);
-//    if(!ret)
-//        ret = AddNewDynarecMap(db, size);
-//
-//    pthread_mutex_unlock(&mutex_mmap);
-//
-//    return ret;
-//}
-
-//void FreeDynarecMap(dynablock_t* db, uintptr_t addr, uint32_t size)
-//{
-//    if(size>MMAPSIZE-2*sizeof(blockmark_t)) {
-//        #ifndef USE_MMAP
-//        free((void*)addr);
-//        #else
-//        munmap((void*)addr, size);
-//        #endif
-//        kh_dynablocks_t *blocks = dblist_oversized;
-//        if(blocks) {
-//            khint_t k = kh_get(dynablocks, blocks, addr);
-//            if(k!=kh_end(blocks))
-//                kh_del(dynablocks, blocks, k);
-//        }
-//        return;
-//    }
-//    pthread_mutex_lock(&mutex_mmap);
-//    ActuallyFreeDynarecMap(db, addr, size);
-//    pthread_mutex_unlock(&mutex_mmap);
-//}
-
-//dynablocklist_t* getDB(uintptr_t idx)
-//{
-//    return dynmap[idx];
-//}
+typedef struct mmaplist_s {
+    void*               block;
+    int                 maxfree;
+    size_t              size;
+    kh_dynablocks_t*    dblist;
+    uint8_t*            helper;
+} mmaplist_t;
+
+uintptr_t FindFreeDynarecMap(dynablock_t* db, int size)
+{
+    // look for free space
+    void* sub = NULL;
+    for(int i=0; i<mmapsize; ++i) {
+        if(mmaplist[i].maxfree>=size) {
+            int rsize = 0;
+            sub = getFirstBlock(mmaplist[i].block, size, &rsize);
+            if(sub) {
+                uintptr_t ret = (uintptr_t)allocBlock(mmaplist[i].block, sub, size);
+                if(rsize==mmaplist[i].maxfree)
+                    mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size);
+                kh_dynablocks_t *blocks = mmaplist[i].dblist;
+                if(!blocks) {
+                    blocks = mmaplist[i].dblist = kh_init(dynablocks);
+                    kh_resize(dynablocks, blocks, 64);
+                }
+                khint_t k;
+                int r;
+                k = kh_put(dynablocks, blocks, (uintptr_t)ret, &r);
+                kh_value(blocks, k) = db;
+                for(int j=0; j<size; ++j)
+                    mmaplist[i].helper[(uintptr_t)ret-(uintptr_t)mmaplist[i].block+j] = (j<256)?j:255;
+                return ret;
+            }
+        }
+    }
+    return 0;
+}
+
+uintptr_t AddNewDynarecMap(dynablock_t* db, int size)
+{
+    int i = mmapsize++;
+    dynarec_log(LOG_DEBUG, "Ask for DynaRec Block Alloc #%d\n", mmapsize);
+    mmaplist = (mmaplist_t*)realloc(mmaplist, mmapsize*sizeof(mmaplist_t));
+    #ifndef USE_MMAP
+    void *p = NULL;
+    if(posix_memalign(&p, box64_pagesize, MMAPSIZE)) {
+        dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i);
+        --mmapsize;
+        return 0;
+    }
+    mprotect(p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
+    #else
+    void* p = mmap(NULL, MMAPSIZE, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_PRIVATE|MAP_ANONYMOUS, -1, 0);
+    if(p==(void*)-1) {
+        dynarec_log(LOG_INFO, "Cannot create memory map of %d byte for dynarec block #%d\n", MMAPSIZE, i);
+        --mmapsize;
+        return 0;
+    }
+    #endif
+    setProtection((uintptr_t)p, MMAPSIZE, PROT_READ | PROT_WRITE | PROT_EXEC);
+
+    mmaplist[i].block = p;
+    mmaplist[i].size = MMAPSIZE;
+    mmaplist[i].helper = (uint8_t*)calloc(1, MMAPSIZE);
+    // setup marks
+    blockmark_t* m = (blockmark_t*)p;
+    m->prev.x32 = 0;
+    m->next.fill = 0;
+    m->next.size = MMAPSIZE-sizeof(blockmark_t);
+    m = (blockmark_t*)(p+MMAPSIZE-sizeof(blockmark_t));
+    m->next.x32 = 0;
+    m->prev.fill = 0;
+    m->prev.size = MMAPSIZE-sizeof(blockmark_t);
+    // alloc 1st block
+    uintptr_t sub  = (uintptr_t)allocBlock(mmaplist[i].block, p, size);
+    mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size);
+    kh_dynablocks_t *blocks = mmaplist[i].dblist = kh_init(dynablocks);
+    kh_resize(dynablocks, blocks, 64);
+    khint_t k;
+    int ret;
+    k = kh_put(dynablocks, blocks, (uintptr_t)sub, &ret);
+    kh_value(blocks, k) = db;
+    for(int j=0; j<size; ++j)
+        mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block + j] = (j<256)?j:255;
+    return sub;
+}
+
+void ActuallyFreeDynarecMap(dynablock_t* db, uintptr_t addr, int size)
+{
+    if(!addr || !size)
+        return;
+    for(int i=0; i<mmapsize; ++i) {
+        if ((addr>(uintptr_t)mmaplist[i].block) 
+         && (addr<((uintptr_t)mmaplist[i].block+mmaplist[i].size))) {
+            void* sub = (void*)(addr-sizeof(blockmark_t));
+            freeBlock(mmaplist[i].block, sub);
+            mmaplist[i].maxfree = getMaxFreeBlock(mmaplist[i].block, mmaplist[i].size);
+            kh_dynablocks_t *blocks = mmaplist[i].dblist;
+            if(blocks) {
+                khint_t k = kh_get(dynablocks, blocks, (uintptr_t)sub);
+                if(k!=kh_end(blocks))
+                    kh_del(dynablocks, blocks, k);
+                for(int j=0; j<size; ++j)
+                    mmaplist[i].helper[(uintptr_t)sub-(uintptr_t)mmaplist[i].block+j] = 0;
+            }
+            return;
+        }
+    }
+    if(mmapsize)
+        dynarec_log(LOG_NONE, "Warning, block %p (size %d) not found in mmaplist for Free\n", (void*)addr, size);
+}
+
+dynablock_t* FindDynablockFromNativeAddress(void* addr)
+{
+    // look in actual list
+    for(int i=0; i<mmapsize; ++i) {
+        if ((uintptr_t)addr>=(uintptr_t)mmaplist[i].block 
+        && ((uintptr_t)addr<(uintptr_t)mmaplist[i].block+mmaplist[i].size)) {
+            if(!mmaplist[i].helper)
+                return FindDynablockDynablocklist(addr, mmaplist[i].dblist);
+            else {
+                uintptr_t p = (uintptr_t)addr - (uintptr_t)mmaplist[i].block;
+                while(mmaplist[i].helper[p]) p -= mmaplist[i].helper[p];
+                khint_t k = kh_get(dynablocks, mmaplist[i].dblist, (uintptr_t)mmaplist[i].block + p);
+                if(k!=kh_end(mmaplist[i].dblist))
+                    return kh_value(mmaplist[i].dblist, k);
+                return NULL;
+            }
+        }
+    }
+    // look in oversized
+    return FindDynablockDynablocklist(addr, dblist_oversized);
+}
+
+uintptr_t AllocDynarecMap(dynablock_t* db, int size)
+{
+    if(!size)
+        return 0;
+    if(size>MMAPSIZE-2*sizeof(blockmark_t)) {
+        #ifndef USE_MMAP
+        void *p = NULL;
+        if(posix_memalign(&p, box64_pagesize, size)) {
+            dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size);
+            return 0;
+        }
+        mprotect(p, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+        #else
+        void* p = mmap(NULL, size, PROT_READ|PROT_WRITE|PROT_EXEC, MAP_ANONYMOUS|MAP_PRIVATE, -1, 0);
+        if(p==(void*)-1) {
+            dynarec_log(LOG_INFO, "Cannot create dynamic map of %d bytes\n", size);
+            return 0;
+        }
+        #endif
+        setProtection((uintptr_t)p, size, PROT_READ | PROT_WRITE | PROT_EXEC);
+        kh_dynablocks_t *blocks = dblist_oversized;
+        if(!blocks) {
+            blocks = dblist_oversized = kh_init(dynablocks);
+            kh_resize(dynablocks, blocks, 64);
+        }
+        khint_t k;
+        int ret;
+        k = kh_put(dynablocks, blocks, (uintptr_t)p, &ret);
+        kh_value(blocks, k) = db;
+        return (uintptr_t)p;
+    }
+    
+    if(pthread_mutex_trylock(&mutex_mmap)) {
+        sched_yield();  // give it a chance
+        if(pthread_mutex_trylock(&mutex_mmap))
+            return 0;   // cannot lock, baillout
+    }
+
+    uintptr_t ret = FindFreeDynarecMap(db, size);
+    if(!ret)
+        ret = AddNewDynarecMap(db, size);
+
+    pthread_mutex_unlock(&mutex_mmap);
+
+    return ret;
+}
+
+void FreeDynarecMap(dynablock_t* db, uintptr_t addr, uint32_t size)
+{
+    if(size>MMAPSIZE-2*sizeof(blockmark_t)) {
+        #ifndef USE_MMAP
+        free((void*)addr);
+        #else
+        munmap((void*)addr, size);
+        #endif
+        kh_dynablocks_t *blocks = dblist_oversized;
+        if(blocks) {
+            khint_t k = kh_get(dynablocks, blocks, addr);
+            if(k!=kh_end(blocks))
+                kh_del(dynablocks, blocks, k);
+        }
+        return;
+    }
+    pthread_mutex_lock(&mutex_mmap);
+    ActuallyFreeDynarecMap(db, addr, size);
+    pthread_mutex_unlock(&mutex_mmap);
+}
+
+dynablocklist_t* getDB(uintptr_t idx)
+{
+    // already 16bits shifted
+    uintptr_t idx3 = (idx>>32)&((1<<DYNAMAP_SHIFT)-1);
+    uintptr_t idx2 = (idx>>16)&((1<<DYNAMAP_SHIFT)-1);
+    uintptr_t idx1 = (idx    )&((1<<DYNAMAP_SHIFT)-1);
+
+    if(!dynmap123[idx3])
+        return NULL;
+    if(!dynmap123[idx3][idx2])
+        return NULL;
+    return dynmap123[idx3][idx2][idx1];
+}
 
 // each dynmap is 64k of size
 
-//void addDBFromAddressRange(uintptr_t addr, uintptr_t size)
-//{
-//    dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1));
-//    uintptr_t idx = (addr>>DYNAMAP_SHIFT);
-//    uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT);
-//    for (uintptr_t i=idx; i<=end; ++i) {
-//        if(!dynmap[i]) {
-//            dynmap[i] = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0);
-//        }
-//    }
-//}
-
-//void cleanDBFromAddressRange(uintptr_t addr, uintptr_t size, int destroy)
-//{
-//    dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark");
-//    uintptr_t idx = (addr>>DYNAMAP_SHIFT);
-//    uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT);
-//    for (uintptr_t i=idx; i<=end; ++i) {
-//        dynablocklist_t* dblist = dynmap[i];
-//        if(dblist) {
-//            if(destroy)
-//                FreeRangeDynablock(dblist, addr, size);
-//            else
-//                MarkRangeDynablock(dblist, addr, size);
-//        }
-//    }
-//}
-
-#ifdef ARM
-//void arm_next(void);
+void addDBFromAddressRange(uintptr_t addr, uintptr_t size)
+{
+    dynarec_log(LOG_DEBUG, "addDBFromAddressRange %p -> %p\n", (void*)addr, (void*)(addr+size-1));
+    uintptr_t idx = (addr>>DYNAMAP_SHIFT);
+    uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT);
+    for (uintptr_t i=idx; i<=end; ++i) {
+        int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1);
+        int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1);
+        int idx1 = (i    )&((1<<DYNAMAP_SHIFT)-1);
+        if(!dynmap123[idx3])
+            dynmap123[idx3] = (dynablocklist_t***)calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t**));
+        if(!dynmap123[idx3][idx2])
+            dynmap123[idx3][idx2] = (dynablocklist_t**)calloc(1<<DYNAMAP_SHIFT, sizeof(dynablocklist_t*));
+        if(!dynmap123[idx3][idx2][idx1])
+            dynmap123[idx3][idx2][idx1] = NewDynablockList(i<<DYNAMAP_SHIFT, 1<<DYNAMAP_SHIFT, 0);
+    }
+}
+
+void cleanDBFromAddressRange(uintptr_t addr, uintptr_t size, int destroy)
+{
+    dynarec_log(LOG_DEBUG, "cleanDBFromAddressRange %p -> %p %s\n", (void*)addr, (void*)(addr+size-1), destroy?"destroy":"mark");
+    uintptr_t idx = (addr>>DYNAMAP_SHIFT);
+    uintptr_t end = ((addr+size-1)>>DYNAMAP_SHIFT);
+    for (uintptr_t i=idx; i<=end; ++i) {
+        int idx3 = (i>>32)&((1<<DYNAMAP_SHIFT)-1);
+        int idx2 = (i>>16)&((1<<DYNAMAP_SHIFT)-1);
+        int idx1 = (i    )&((1<<DYNAMAP_SHIFT)-1);
+        if(dynmap123[idx3] && dynmap123[idx3][idx2]) {
+            dynablocklist_t* dblist = dynmap123[idx3][idx2][idx1];
+            if(dblist) {
+                if(destroy)
+                    FreeRangeDynablock(dblist, addr, size);
+                else
+                    MarkRangeDynablock(dblist, addr, size);
+            }
+        }
+    }
+}
+
+#ifdef ARM64
+void arm64_next(void);
 #endif
 
-//void addJumpTableIfDefault(void* addr, void* jmp)
-//{
-//    const uintptr_t idx = ((uintptr_t)addr>>JMPTABL_SHIFT);
-//    if(box64_jumptable[idx] == box64_jmptbl_default) {
-//        uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t));
-//        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
-//            tbl[i] = (uintptr_t)arm_next;
-//        box64_jumptable[idx] = tbl;
-//    }
-//    const uintptr_t off = (uintptr_t)addr&((1<<JMPTABL_SHIFT)-1);
-//    if(box64_jumptable[idx][off]==(uintptr_t)arm_next)
-//        box64_jumptable[idx][off] = (uintptr_t)jmp;
-//}
-//void setJumpTableDefault(void* addr)
-//{
-//    const uintptr_t idx = ((uintptr_t)addr>>JMPTABL_SHIFT);
-//    if(box64_jumptable[idx] == box64_jmptbl_default) {
-//        return;
-//    }
-//    const uintptr_t off = (uintptr_t)addr&((1<<JMPTABL_SHIFT)-1);
-//    box64_jumptable[idx][off] = (uintptr_t)arm_next;
-//}
-//uintptr_t getJumpTable()
-//{
-//    return (uintptr_t)box64_jumptable;
-//}
-
-//uintptr_t getJumpTableAddress(uintptr_t addr)
-//{
-//    const uintptr_t idx = ((uintptr_t)addr>>JMPTABL_SHIFT);
-//    if(box64_jumptable[idx] == box64_jmptbl_default) {
-//        uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t));
-//        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
-//            tbl[i] = (uintptr_t)arm_next;
-//        box64_jumptable[idx] = tbl;
-//    }
-//    const uintptr_t off = (uintptr_t)addr&((1<<JMPTABL_SHIFT)-1);
-//    return (uintptr_t)&box64_jumptable[idx][off];
-//}
+void addJumpTableIfDefault64(void* addr, void* jmp)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = (((uintptr_t)addr)>>48)&0xffff;
+    idx2 = (((uintptr_t)addr)>>32)&0xffff;
+    idx1 = (((uintptr_t)addr)>>16)&0xffff;
+    idx0 = (((uintptr_t)addr)    )&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2) {
+        uintptr_t*** tbl = (uintptr_t***)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault1;
+        box64_jmptbl3[idx3] = tbl;
+    }
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) {
+        uintptr_t** tbl = (uintptr_t**)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault0;
+        box64_jmptbl3[idx3][idx2] = tbl;
+    }
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) {
+        uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = (uintptr_t)arm64_next;
+        box64_jmptbl3[idx3][idx2][idx1] = tbl;
+    }
+
+    if(box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)arm64_next)
+        box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)jmp;
+}
+void setJumpTableDefault64(void* addr)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = (((uintptr_t)addr)>>48)&0xffff;
+    idx2 = (((uintptr_t)addr)>>32)&0xffff;
+    idx1 = (((uintptr_t)addr)>>16)&0xffff;
+    idx0 = (((uintptr_t)addr)    )&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2)
+        return;
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1)
+        return;
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0)
+        return;
+    if(box64_jmptbl3[idx3][idx2][idx1][idx0]==(uintptr_t)arm64_next)
+        return;
+    box64_jmptbl3[idx3][idx2][idx1][idx0] = (uintptr_t)arm64_next;
+}
+uintptr_t getJumpTable64()
+{
+    return (uintptr_t)box64_jmptbl3;
+}
+
+uintptr_t getJumpTableAddress64(uintptr_t addr)
+{
+    uintptr_t idx3, idx2, idx1, idx0;
+    idx3 = (((uintptr_t)addr)>>48)&0xffff;
+    idx2 = (((uintptr_t)addr)>>32)&0xffff;
+    idx1 = (((uintptr_t)addr)>>16)&0xffff;
+    idx0 = (((uintptr_t)addr)    )&0xffff;
+    if(box64_jmptbl3[idx3] == box64_jmptbldefault2) {
+        uintptr_t*** tbl = (uintptr_t***)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t**));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault1;
+        box64_jmptbl3[idx3] = tbl;
+    }
+    if(box64_jmptbl3[idx3][idx2] == box64_jmptbldefault1) {
+        uintptr_t** tbl = (uintptr_t**)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t*));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = box64_jmptbldefault0;
+        box64_jmptbl3[idx3][idx2] = tbl;
+    }
+    if(box64_jmptbl3[idx3][idx2][idx1] == box64_jmptbldefault0) {
+        uintptr_t* tbl = (uintptr_t*)malloc((1<<JMPTABL_SHIFT)*sizeof(uintptr_t));
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
+            tbl[i] = (uintptr_t)arm64_next;
+        box64_jmptbl3[idx3][idx2][idx1] = tbl;
+    }
+
+    return (uintptr_t)&box64_jmptbl3[idx3][idx2][idx1][idx0];
+}
 
 // Remove the Write flag from an adress range, so DB can be executed
 // no log, as it can be executed inside a signal handler
-//void protectDB(uintptr_t addr, uintptr_t size)
-//{
-//    dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
-//    uintptr_t idx = (addr>>MEMPROT_SHIFT);
-//    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
-//    pthread_mutex_lock(&mutex_prot);
-//    for (uintptr_t i=idx; i<=end; ++i) {
-//        uint32_t prot = memprot[i];
-//        if(!prot)
-//            prot = PROT_READ | PROT_WRITE;    // comes from malloc & co, so should not be able to execute
-//        memprot[i] = prot|PROT_DYNAREC;
-//        if(!(prot&PROT_DYNAREC))
-//            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
-//    }
-//    pthread_mutex_unlock(&mutex_prot);
-//}
-
-//void protectDBnolock(uintptr_t addr, uintptr_t size)
-//{
-//    dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
-//    uintptr_t idx = (addr>>MEMPROT_SHIFT);
-//    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
-//    for (uintptr_t i=idx; i<=end; ++i) {
-//        uint32_t prot = memprot[i];
-//        if(!prot)
-//            prot = PROT_READ | PROT_WRITE;    // comes from malloc & co, so should not be able to execute
-//        memprot[i] = prot|PROT_DYNAREC;
-//        if(!(prot&PROT_DYNAREC))
-//            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
-//    }
-//}
-
-//void lockDB()
-//{
-//    pthread_mutex_lock(&mutex_prot);
-//}
-
-//void unlockDB()
-//{
-//    pthread_mutex_unlock(&mutex_prot);
-//}
+void protectDB(uintptr_t addr, uintptr_t size)
+{
+    dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
+    uintptr_t idx = (addr>>MEMPROT_SHIFT);
+    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
+    int ret;
+    pthread_mutex_lock(&mutex_prot);
+    for (uintptr_t i=idx; i<=end; ++i) {
+        const uint32_t key = (i>>16)&0xffffffff;
+        khint_t k = kh_put(memprot, memprot, key, &ret);
+        if(ret) {
+            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
+            kh_value(memprot, k) = m;
+        }
+        const uintptr_t ii = i&(MEMPROT_SIZE-1);
+        uint8_t prot = kh_value(memprot, k)[ii];
+        if(!prot)
+            prot = PROT_READ | PROT_WRITE;    // comes from malloc & co, so should not be able to execute
+        kh_value(memprot, k)[ii] = prot|PROT_DYNAREC;
+        if(!(prot&PROT_DYNAREC))
+            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
+    }
+    pthread_mutex_unlock(&mutex_prot);
+}
+
+void protectDBnolock(uintptr_t addr, uintptr_t size)
+{
+    dynarec_log(LOG_DEBUG, "protectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
+    uintptr_t idx = (addr>>MEMPROT_SHIFT);
+    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
+    int ret;
+    for (uintptr_t i=idx; i<=end; ++i) {
+        const uint32_t key = (i>>16)&0xffffffff;
+        khint_t k = kh_put(memprot, memprot, key, &ret);
+        if(ret) {
+            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
+            kh_value(memprot, k) = m;
+        }
+        const uintptr_t ii = i&(MEMPROT_SIZE-1);
+        uint8_t prot = kh_value(memprot, k)[ii];
+        if(!prot)
+            prot = PROT_READ | PROT_WRITE;    // comes from malloc & co, so should not be able to execute
+        kh_value(memprot, k)[ii] = prot|PROT_DYNAREC;
+        if(!(prot&PROT_DYNAREC))
+            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_WRITE);
+    }
+}
+
+void lockDB()
+{
+    pthread_mutex_lock(&mutex_prot);
+}
+
+void unlockDB()
+{
+    pthread_mutex_unlock(&mutex_prot);
+}
 
 // Add the Write flag from an adress range, and mark all block as dirty
 // no log, as it can be executed inside a signal handler
-//void unprotectDB(uintptr_t addr, uintptr_t size)
-//{
-//    dynarec_log(LOG_DEBUG, "unprotectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
-//    uintptr_t idx = (addr>>MEMPROT_SHIFT);
-//    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
-//    pthread_mutex_lock(&mutex_prot);
-//    for (uintptr_t i=idx; i<=end; ++i) {
-//        uint32_t prot = memprot[i];
-//        memprot[i] = prot&~PROT_DYNAREC;
-//        if(prot&PROT_DYNAREC) {
-//            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_DYNAREC);
-//            cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0);
-//        }
-//    }
-//    pthread_mutex_unlock(&mutex_prot);
-//}
+void unprotectDB(uintptr_t addr, uintptr_t size)
+{
+    dynarec_log(LOG_DEBUG, "unprotectDB %p -> %p\n", (void*)addr, (void*)(addr+size-1));
+    uintptr_t idx = (addr>>MEMPROT_SHIFT);
+    uintptr_t end = ((addr+size-1)>>MEMPROT_SHIFT);
+    int ret;
+    pthread_mutex_lock(&mutex_prot);
+    for (uintptr_t i=idx; i<=end; ++i) {
+        const uint32_t key = (i>>16)&0xffffffff;
+        khint_t k = kh_put(memprot, memprot, key, &ret);
+        if(ret) {
+            uint8_t *m = (uint8_t*)calloc(1, MEMPROT_SIZE);
+            kh_value(memprot, k) = m;
+        }
+        const uintptr_t ii = i&(MEMPROT_SIZE-1);
+        uint8_t prot = kh_value(memprot, k)[ii];
+        kh_value(memprot, k)[ii] = prot&~PROT_DYNAREC;
+        if(prot&PROT_DYNAREC) {
+            mprotect((void*)(i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, prot&~PROT_DYNAREC);
+            cleanDBFromAddressRange((i<<MEMPROT_SHIFT), 1<<MEMPROT_SHIFT, 0);
+        }
+    }
+    pthread_mutex_unlock(&mutex_prot);
+}
 
 #endif
 
@@ -701,11 +787,14 @@ void init_custommem_helper(box64context_t* ctx)
     pthread_mutex_init(&mutex_prot, NULL);
 #ifdef DYNAREC
     pthread_mutex_init(&mutex_mmap, NULL);
-#ifdef ARM
-//    for(int i=0; i<(1<<JMPTABL_SHIFT); ++i)
-//        box64_jmptbl_default[i] = (uintptr_t)arm_next;
-//    for(int i=0; i<JMPTABL_SIZE; ++i)
-//        box64_jumptable[i] = box64_jmptbl_default;
+#ifdef ARM64
+    if(box64_dynarec)
+        for(int i=0; i<(1<<JMPTABL_SHIFT); ++i) {
+            box64_jmptbldefault0[i] = (uintptr_t)arm64_next;
+            box64_jmptbldefault1[i] = box64_jmptbldefault0;
+            box64_jmptbldefault2[i] = box64_jmptbldefault1;
+            box64_jmptbl3[i] = box64_jmptbldefault2;
+        }
 #else
 #error Unsupported architecture!
 #endif
@@ -718,51 +807,57 @@ void fini_custommem_helper(box64context_t *ctx)
         return;
     inited = 0;
 #ifdef DYNAREC
-//    dynarec_log(LOG_DEBUG, "Free global Dynarecblocks\n");
-//    for (int i=0; i<mmapsize; ++i) {
-//        if(mmaplist[i].block)
-//            #ifdef USE_MMAP
-//            munmap(mmaplist[i].block, mmaplist[i].size);
-//            #else
-//            free(mmaplist[i].block);
-//            #endif
-//        if(mmaplist[i].dblist) {
-//            kh_destroy(dynablocks, mmaplist[i].dblist);
-//            mmaplist[i].dblist = NULL;
-//        }
-//        if(mmaplist[i].helper) {
-//            free(mmaplist[i].helper);
-//            mmaplist[i].helper = NULL;
-//        }
-//    }
-//    if(dblist_oversized) {
-//        kh_destroy(dynablocks, dblist_oversized);
-//        dblist_oversized = NULL;
-//    }
-//    mmapsize = 0;
-//    dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n");
-//    uintptr_t idx = 0;
-//    uintptr_t end = ((0xFFFFFFFF)>>DYNAMAP_SHIFT);
-//    for (uintptr_t i=idx; i<=end; ++i) {
-//        dynablocklist_t* dblist = dynmap[i];
-//        if(dblist) {
-//            uintptr_t startdb = StartDynablockList(dblist);
-//            uintptr_t enddb = EndDynablockList(dblist);
-//            uintptr_t startaddr = 0;
-//            if(startaddr<startdb) startaddr = startdb;
-//            uintptr_t endaddr = 0xFFFFFFFF;
-//            if(endaddr>enddb) endaddr = enddb;
-//            FreeRangeDynablock(dblist, startaddr, endaddr-startaddr+1);
-//        }
-//    }
-//    for (uintptr_t i=idx; i<=end; ++i)
-//        if(dynmap[i])
-//            FreeDynablockList(&dynmap[i]);
-//    pthread_mutex_destroy(&mutex_mmap);
-//    free(mmaplist);
-//    for (int i=0; i<DYNAMAP_SIZE; ++i)
-//        if(box64_jumptable[i]!=box64_jmptbl_default)
-//            free(box64_jumptable[i]);
+    if(box64_dynarec) {
+        dynarec_log(LOG_DEBUG, "Free global Dynarecblocks\n");
+        for (int i=0; i<mmapsize; ++i) {
+            if(mmaplist[i].block)
+                #ifdef USE_MMAP
+                munmap(mmaplist[i].block, mmaplist[i].size);
+                #else
+                free(mmaplist[i].block);
+                #endif
+            if(mmaplist[i].dblist) {
+                kh_destroy(dynablocks, mmaplist[i].dblist);
+                mmaplist[i].dblist = NULL;
+            }
+            if(mmaplist[i].helper) {
+                free(mmaplist[i].helper);
+                mmaplist[i].helper = NULL;
+            }
+        }
+        if(dblist_oversized) {
+            kh_destroy(dynablocks, dblist_oversized);
+            dblist_oversized = NULL;
+        }
+        mmapsize = 0;
+        dynarec_log(LOG_DEBUG, "Free dynamic Dynarecblocks\n");
+        for (uintptr_t idx3=0; idx3<=0xffff; ++idx3)
+            if(dynmap123[idx3]) {
+                for (uintptr_t idx2=0; idx2<=0xffff; ++idx2)
+                    if(dynmap123[idx3][idx2]) {
+                        for (uintptr_t idx1=0; idx1<=0xffff; ++idx1)
+                            if(dynmap123[idx3][idx2][idx1])
+                                FreeDynablockList(&dynmap123[idx3][idx2][idx1]);
+                        free(dynmap123[idx3][idx2]);
+                    }
+                free(dynmap123[idx3]);
+            }
+
+        free(mmaplist);
+        pthread_mutex_destroy(&mutex_mmap);
+        for (int i3=0; i3<(1<<DYNAMAP_SHIFT); ++i3)
+            if(box64_jmptbl3[i3]!=box64_jmptbldefault2) {
+                for (int i2=0; i2<(1<<DYNAMAP_SHIFT); ++i2)
+                    if(box64_jmptbl3[i3][i2]!=box64_jmptbldefault1) {
+                        for (int i1=0; i1<(1<<DYNAMAP_SHIFT); ++i1)
+                            if(box64_jmptbl3[i3][i2][i1]!=box64_jmptbldefault0) {
+                                free(box64_jmptbl3[i3][i2][i1]);
+                            }
+                        free(box64_jmptbl3[i3][i2]);
+                    }
+                free(box64_jmptbl3[i3]);
+            }
+    }
 #endif
     uint8_t* m;
     kh_foreach_value(memprot, m,
diff --git a/src/dynarec/arm64_epilog.S b/src/dynarec/arm64_epilog.S
new file mode 100755
index 00000000..af39c1ba
--- /dev/null
+++ b/src/dynarec/arm64_epilog.S
@@ -0,0 +1,81 @@
+//arm epilog for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global arm64_epilog
+arm64_epilog:
+    //update register -> emu
+    str     x10,  [x0, (8 *  0)]
+    str     x11,  [x0, (8 *  1)]
+    str     x12, [x0, (8 *  2)]
+    str     x13, [x0, (8 *  3)]
+    str     x14, [x0, (8 *  4)]
+    str     x15, [x0, (8 *  5)]
+    str     x16, [x0, (8 *  6)]
+    str     x17, [x0, (8 *  7)]
+    str     x18, [x0, (8 *  8)]
+    str     x19, [x0, (8 *  9)]
+    str     x20, [x0, (8 * 10)]
+    str     x21, [x0, (8 * 11)]
+    str     x22, [x0, (8 * 12)]
+    str     x23, [x0, (8 * 13)]
+    str     x24, [x0, (8 * 14)]
+    str     x25, [x0, (8 * 15)]
+    str     x26, [x0, (8 * 16)]
+    str     x27, [x0, (8 * 17)] // put back reg value in emu, including EIP (so x25 must be EIP now)
+    //restore all used register
+    //vpop     {d8-d15}
+    ldr     x10, [sp, (8 *  0)]
+    ldr     x11, [sp, (8 *  1)]
+    ldr     x12, [sp, (8 *  2)]
+    ldr     x13, [sp, (8 *  3)]
+    ldr     x14, [sp, (8 *  4)]
+    ldr     x15, [sp, (8 *  5)]
+    ldr     x16, [sp, (8 *  6)]
+    ldr     x17, [sp, (8 *  7)]
+    ldr     x18, [sp, (8 *  8)]
+    ldr     x19, [sp, (8 *  9)]
+    ldr     x20, [sp, (8 * 10)]
+    ldr     x21, [sp, (8 * 11)]
+    ldr     x22, [sp, (8 * 12)]
+    ldr     x23, [sp, (8 * 13)]
+    ldr     x24, [sp, (8 * 14)]
+    ldr     x25, [sp, (8 * 15)]
+    ldr     x26, [sp, (8 * 16)]
+    ldr     x27, [sp, (8 * 17)]
+    add     sp,  sp, (8 * 18)
+    ldp     lr, fp, [sp, 16]!  // saved lr
+    //end, return...
+    ret
+
+
+.global arm64_epilog_fast
+arm64_epilog_fast:
+    //restore all used register
+    //vpop     {d8-d15}
+    ldr     x8,  [sp, (8 *  0)]
+    ldr     x9,  [sp, (8 *  1)]
+    ldr     x10, [sp, (8 *  2)]
+    ldr     x11, [sp, (8 *  3)]
+    ldr     x12, [sp, (8 *  4)]
+    ldr     x13, [sp, (8 *  5)]
+    ldr     x14, [sp, (8 *  6)]
+    ldr     x15, [sp, (8 *  7)]
+    ldr     x16, [sp, (8 *  8)]
+    ldr     x17, [sp, (8 *  9)]
+    ldr     x18, [sp, (8 * 10)]
+    ldr     x19, [sp, (8 * 11)]
+    ldr     x20, [sp, (8 * 12)]
+    ldr     x21, [sp, (8 * 13)]
+    ldr     x22, [sp, (8 * 14)]
+    ldr     x23, [sp, (8 * 15)]
+    ldr     x24, [sp, (8 * 16)]
+    ldr     x25, [sp, (8 * 17)]
+    add     sp,  sp, (8 * 18)
+    ldp     lr, fp, [sp, 16]!  // saved lr
+    //end, return...
+    ret
diff --git a/src/dynarec/arm64_lock_helper.S b/src/dynarec/arm64_lock_helper.S
new file mode 100755
index 00000000..51b43316
--- /dev/null
+++ b/src/dynarec/arm64_lock_helper.S
@@ -0,0 +1,87 @@
+//arm lock helper
+//there is 2 part: read and write
+// write return 0 on success, 1 on fail (value has been changed)
+
+.text
+.align 4
+
+.global arm64_lock_read_b
+.global arm64_lock_write_b
+.global arm64_lock_read_h
+.global arm64_lock_write_h
+.global arm64_lock_read_d
+.global arm64_lock_write_d
+.global arm64_lock_read_dd
+.global arm64_lock_write_dd
+.global arm64_lock_xchg
+.global arm64_lock_storeifnull
+
+
+arm64_lock_read_b:
+    // address is x0, return is x0
+    ldaxrb  w0, [x0]
+    ret
+
+arm64_lock_write_b:
+    // address is x0, value is x1, return is x0
+    mov     x2, x0
+    stlxrb  w0, w1, [x2]
+    ret
+
+arm64_lock_read_h:
+    // address is x0, return is x0
+    ldaxrh  w0, [x0]
+    ret
+
+arm64_lock_write_h:
+    // address is x0, value is x1, return is x0
+    mov     x2, x0
+    stlxrh  w0, w1, [x2]
+    ret
+
+arm64_lock_read_d:
+    // address is x0, return is x0
+    #ldaxr   w0, [x0]
+    ldr     w0,[x0]
+    ret
+
+arm64_lock_write_d:
+    // address is x0, value is w1, return is x0
+    mov     x2, x0
+    #stlxr   w0, w1, [x2]
+    str     w1, [x2]
+    mov     w0, 0
+    ret
+
+arm64_lock_read_dd:
+    // address is x0, return is x0
+    ldaxr   x0, [x0]
+    ret
+
+arm64_lock_write_dd:
+    // address is x0, value is x1, return is x0
+    mov     x2, x0
+    stlxr   w0, x1, [x2]
+    ret
+
+arm64_lock_xchg:
+    // address is x0, value is x1, return old value in x0
+    ldaxr   w2, [x0]
+    stlxr   w3, w1, [x0]
+    cmp     w3, #1
+    beq     arm64_lock_xchg
+    mov     w0, w2
+    ret
+
+arm64_lock_storeifnull:
+    // address is x0, value is x1, x1 store to x0 only if [x0] is 0. return new [x0] value (so x1 or old value)
+    ldaxr   x2, [x0]
+    cmp     x2, #0
+    bne     arm64_lock_storeifnull_exit
+    mov     x2, x1
+    stlxr   w3, x2, [x0]
+    cmp     w3, #1
+    beq     arm64_lock_storeifnull
+arm64_lock_storeifnull_exit:
+    mov     x0, x2
+    ret
diff --git a/src/dynarec/arm64_lock_helper.h b/src/dynarec/arm64_lock_helper.h
new file mode 100755
index 00000000..a6879bea
--- /dev/null
+++ b/src/dynarec/arm64_lock_helper.h
@@ -0,0 +1,31 @@
+#ifndef __ARM64_LOCK_HELPER__H__
+#define __ARM64_LOCK_HELPER__H__
+#include <stdint.h>
+
+// LDAXRB of ADDR
+extern uint8_t arm64_lock_read_b(void* addr);
+// STLXRB of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_b(void* addr, uint8_t val);
+
+// LDAXRH of ADDR
+extern uint16_t arm64_lock_read_h(void* addr);
+// STLXRH of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_h(void* addr, uint16_t val);
+
+// LDAXR of ADDR
+extern uint32_t arm64_lock_read_d(void* addr);
+// STLXR of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_d(void* addr, uint32_t val);
+
+// LDAXRD of ADDR
+extern uint64_t arm64_lock_read_dd(void* addr);
+// STLXR of ADDR, return 0 if ok, 1 if not
+extern int arm64_lock_write_dd(void* addr, uint64_t val);
+
+// Atomicaly exchange value at [p] with val, return old p
+extern uintptr_t arm64_lock_xchg(void* p, uintptr_t val);
+
+// Atomicaly store value to [p] only if [p] is NULL. Return new [p] value (so val or old)
+extern void* arm64_lock_storeifnull(void*p, void* val);
+
+#endif  //__ARM64_LOCK_HELPER__H__
\ No newline at end of file
diff --git a/src/dynarec/arm64_next.S b/src/dynarec/arm64_next.S
new file mode 100755
index 00000000..2410750c
--- /dev/null
+++ b/src/dynarec/arm64_next.S
@@ -0,0 +1,47 @@
+//arm update linker table for dynarec
+//called with pointer to emu as 1st parameter
+//and address of table to as 2nd parameter
+//ip is at r12
+
+.text
+.align 4
+
+.extern LinkNext
+
+.global arm64_next
+arm64_next:
+    // emu is r0
+    // don't put put back reg value in emu, faster but more tricky to debug
+    // IP address is r1
+    sub     sp,  sp, (8 * 11)
+    str     x0,  [sp, (8 *  0)]
+    str     x1,  [sp, (8 *  1)]
+    str     x10, [sp, (8 *  2)]
+    str     x11, [sp, (8 *  3)]
+    str     x12, [sp, (8 *  4)]
+    str     x13, [sp, (8 *  5)]
+    str     x14, [sp, (8 *  6)]
+    str     x15, [sp, (8 *  7)]
+    str     x16, [sp, (8 *  8)]
+    str     x17, [sp, (8 *  9)]
+    str     x18, [sp, (8 * 10)]
+    // call the function
+    bl      LinkNext
+    // preserve return value
+    mov     x3, x0
+    // pop regs
+    ldr     x0,  [sp, (8 *  0)]
+    ldr     x1,  [sp, (8 *  1)]
+    ldr     x10, [sp, (8 *  2)]
+    ldr     x11, [sp, (8 *  3)]
+    ldr     x12, [sp, (8 *  4)]
+    ldr     x13, [sp, (8 *  5)]
+    ldr     x14, [sp, (8 *  6)]
+    ldr     x15, [sp, (8 *  7)]
+    ldr     x16, [sp, (8 *  8)]
+    ldr     x17, [sp, (8 *  9)]
+    ldr     x18, [sp, (8 * 10)]
+    add     sp,  sp, (8 * 11)
+    // return offset is jump address
+    br      x3
+
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
new file mode 100755
index 00000000..49539a1a
--- /dev/null
+++ b/src/dynarec/arm64_printer.c
@@ -0,0 +1,14 @@
+#include <stdint.h>

+#include <stddef.h>

+#include <string.h>

+#include <stdio.h>

+

+#include "arm64_printer.h"

+

+const char* arm64_print(uint32_t opcode)

+{

+    static char buff[200];

+

+    snprintf(buff, sizeof(buff), "0x%8X ???", opcode);

+    return buff;

+}
\ No newline at end of file
diff --git a/src/dynarec/arm64_printer.h b/src/dynarec/arm64_printer.h
new file mode 100644
index 00000000..6fe21c33
--- /dev/null
+++ b/src/dynarec/arm64_printer.h
@@ -0,0 +1,6 @@
+#ifndef _ARM_PRINTER_H_
+#define _ARM_PRINTER_H_
+
+const char* arm64_print(uint32_t opcode);
+
+#endif //_ARM_PRINTER_H_
diff --git a/src/dynarec/arm64_prolog.S b/src/dynarec/arm64_prolog.S
new file mode 100755
index 00000000..f480f2ea
--- /dev/null
+++ b/src/dynarec/arm64_prolog.S
@@ -0,0 +1,53 @@
+//arm prologue for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global arm64_prolog
+arm64_prolog:
+    //save all 18 used register
+    stp     lr, fp, [sp, 16]!  // save lr
+    sub     sp,  sp, (8 * 18)
+    str     x10, [sp, (8 *  0)]
+    str     x11, [sp, (8 *  1)]
+    str     x12, [sp, (8 *  2)]
+    str     x13, [sp, (8 *  3)]
+    str     x14, [sp, (8 *  4)]
+    str     x15, [sp, (8 *  5)]
+    str     x16, [sp, (8 *  6)]
+    str     x17, [sp, (8 *  7)]
+    str     x18, [sp, (8 *  8)]
+    str     x19, [sp, (8 *  9)]
+    str     x20, [sp, (8 * 10)]
+    str     x21, [sp, (8 * 11)]
+    str     x22, [sp, (8 * 12)]
+    str     x23, [sp, (8 * 13)]
+    str     x24, [sp, (8 * 14)]
+    str     x25, [sp, (8 * 15)]
+    str     x26, [sp, (8 * 16)]
+    str     x27, [sp, (8 * 17)]
+    //vpush     {d8-d15}    // save NEON regs?
+    //setup emu -> register
+    ldr     x10, [x0, (8 *  0)]
+    ldr     x11, [x0, (8 *  1)]
+    ldr     x12, [x0, (8 *  2)]
+    ldr     x13, [x0, (8 *  3)]
+    ldr     x14, [x0, (8 *  4)]
+    ldr     x15, [x0, (8 *  5)]
+    ldr     x16, [x0, (8 *  6)]
+    ldr     x17, [x0, (8 *  7)]
+    ldr     x18, [x0, (8 *  8)]
+    ldr     x19, [x0, (8 *  9)]
+    ldr     x20, [x0, (8 * 10)]
+    ldr     x21, [x0, (8 * 11)]
+    ldr     x22, [x0, (8 * 12)]
+    ldr     x23, [x0, (8 * 13)]
+    ldr     x24, [x0, (8 * 14)]
+    ldr     x25, [x0, (8 * 15)]
+    ldr     x26, [x0, (8 * 16)]
+    ldr     x27, [x0, (8 * 17)]
+    //jump to function
+    br       x1
diff --git a/src/dynarec/dynablock.c b/src/dynarec/dynablock.c
new file mode 100755
index 00000000..2ab39d09
--- /dev/null
+++ b/src/dynarec/dynablock.c
@@ -0,0 +1,422 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynablock.h"
+#include "dynablock_private.h"
+#include "dynarec_private.h"
+#include "elfloader.h"
+#ifdef ARM64
+#include "dynarec_arm64.h"
+#include "arm64_lock_helper.h"
+#else
+#error Unsupported architecture!
+#endif
+#include "custommem.h"
+#include "khash.h"
+
+KHASH_MAP_INIT_INT(dynablocks, dynablock_t*)
+
+uint32_t X31_hash_code(void* addr, int len)
+{
+    if(!len) return 0;
+    uint8_t* p = (uint8_t*)addr;
+	int32_t h = *p;
+	for (--len, ++p; len; --len, ++p) h = (h << 5) - h + (int32_t)*p;
+	return (uint32_t)h;
+}
+
+dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct)
+{
+    if(!textsz) {
+        printf_log(LOG_NONE, "Error, creating a NULL sized Dynablock\n");
+        return NULL;
+    }
+    dynablocklist_t* ret = (dynablocklist_t*)calloc(1, sizeof(dynablocklist_t));
+    ret->text = text;
+    ret->textsz = textsz;
+    if(direct && textsz) {
+        ret->direct = (dynablock_t**)calloc(textsz, sizeof(dynablock_t*));
+        if(!ret->direct) {printf_log(LOG_NONE, "Warning, fail to create direct block for dynablock @%p\n", (void*)text);}
+    }
+    dynarec_log(LOG_DEBUG, "New Dynablocklist %p, from %p->%p\n", ret, (void*)text, (void*)(text+textsz));
+    return ret;
+}
+
+void FreeDynablock(dynablock_t* db)
+{
+    if(db) {
+        dynarec_log(LOG_DEBUG, "FreeDynablock(%p), db->block=%p x64=%p:%p father=%p, with %d son(s) already gone=%d\n", db, db->block, db->x64_addr, db->x64_addr+db->x64_size, db->father, db->sons_size, db->gone);
+        if(db->gone)
+            return; // already in the process of deletion!
+        db->done = 0;
+        db->gone = 1;
+        // remove from direct if there
+        uintptr_t startdb = db->parent->text;
+        uintptr_t enddb = db->parent->text + db->parent->textsz;
+        if(db->parent->direct) {
+            uintptr_t addr = (uintptr_t)db->x64_addr;
+            if(addr>=startdb && addr<enddb)
+                db->parent->direct[addr-startdb] = NULL;
+        }
+        // remove jumptable
+        setJumpTableDefault64(db->x64_addr);
+        // remove and free the sons
+        for (int i=0; i<db->sons_size; ++i) {
+            dynablock_t *son = (dynablock_t*)arm64_lock_xchg(&db->sons[i], 0);
+            FreeDynablock(son);
+        }
+        // only the father free the DynarecMap
+        if(!db->father) {
+            dynarec_log(LOG_DEBUG, " -- FreeDyrecMap(%p, %d)\n", db->block, db->size);
+            FreeDynarecMap(db, (uintptr_t)db->block, db->size);
+        }
+        free(db->sons);
+        free(db->instsize);
+        free(db);
+    }
+}
+
+void FreeDynablockList(dynablocklist_t** dynablocks)
+{
+    if(!dynablocks)
+        return;
+    if(!*dynablocks)
+        return;
+    dynarec_log(LOG_DEBUG, "Free Dynablocklist %p, with Direct Blocks %p\n", *dynablocks, (*dynablocks)->direct);
+    if((*dynablocks)->direct) {
+        for (int i=0; i<(*dynablocks)->textsz; ++i) {
+            if((*dynablocks)->direct[i] && !(*dynablocks)->direct[i]->father) 
+                FreeDynablock((*dynablocks)->direct[i]);
+        }
+        free((*dynablocks)->direct);
+    }
+    (*dynablocks)->direct = NULL;
+
+    free(*dynablocks);
+    *dynablocks = NULL;
+}
+
+void MarkDynablock(dynablock_t* db)
+{
+    if(db) {
+        if(db->father)
+            db = db->father;    // mark only father
+        if(db->need_test)
+            return; // already done
+        db->need_test = 1;  // test only blocks that can be marked (and so deleted)
+        setJumpTableDefault64(db->x64_addr);
+        for(int i=0; i<db->sons_size; ++i)
+            setJumpTableDefault64(db->sons[i]->x64_addr);
+    }
+}
+
+uintptr_t StartDynablockList(dynablocklist_t* db)
+{
+    if(db)
+        return db->text;
+    return 0;
+}
+uintptr_t EndDynablockList(dynablocklist_t* db)
+{
+    if(db)
+        return db->text+db->textsz-1;
+    return 0;
+}
+
+int IntervalIntersects(uintptr_t start1, uintptr_t end1, uintptr_t start2, uintptr_t end2)
+{
+    if(start1 > end2 || start2 > end1)
+        return 0;
+    return 1;
+}
+
+void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
+{
+    if(!dynablocks)
+        return;
+    if(!dynablocks->direct)
+        return;
+    uintptr_t startdb = dynablocks->text;
+    uintptr_t enddb = startdb + dynablocks->textsz -1;
+    uintptr_t start = addr;
+    uintptr_t end = addr+size-1;
+    if(start<startdb)
+        start = startdb;
+    if(end>enddb)
+        end = enddb;
+    dynablock_t *db;
+    if(end>startdb && start<enddb)
+        for(uintptr_t i = start; i<end; ++i)
+            if((db=dynablocks->direct[i-startdb]))
+                if(IntervalIntersects((uintptr_t)db->x64_addr, (uintptr_t)db->x64_addr+db->x64_size-1, addr, addr+size+1))
+                    MarkDynablock(db);
+}
+
+void FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
+{
+    if(!dynablocks)
+        return;
+
+    if(dynablocks->direct) {
+        dynablock_t* db;
+        int ret;
+        khint_t k;
+        kh_dynablocks_t *blocks = kh_init(dynablocks);
+        // copy in a temporary list
+        if(dynablocks->direct) {
+            uintptr_t startdb = dynablocks->text;
+            uintptr_t enddb = startdb + dynablocks->textsz;
+            uintptr_t start = addr;
+            uintptr_t end = addr+size;
+            if(start<startdb)
+                start = startdb;
+            if(end>enddb)
+                end = enddb;
+            if(end>startdb && start<enddb)
+                for(uintptr_t i = start; i<end; ++i) {
+                    db = (dynablock_t*)arm64_lock_xchg(&dynablocks->direct[i-startdb], 0);
+                    if(db) {
+                        if(db->father)
+                            db = db->father;
+                        if(db->parent==dynablocks) {
+                            k = kh_put(dynablocks, blocks, (uintptr_t)db, &ret);
+                            kh_value(blocks, k) = db;
+                        }
+                    }
+                }
+        }
+        // purge the list
+        kh_foreach_value(blocks, db,
+            FreeDynablock(db);
+        );
+        kh_destroy(dynablocks, blocks);
+    }
+}
+void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size)
+{
+    if(!dynablocks)
+        return;
+    if(dynablocks->direct) {
+        uintptr_t new_addr = addr - dynablocks->maxsz;
+        uintptr_t new_size = size + dynablocks->maxsz;
+        MarkDirectDynablock(dynablocks, new_addr, new_size);
+        // the blocks check before
+        for(int idx=(new_addr)>>DYNAMAP_SHIFT; idx<(addr>>DYNAMAP_SHIFT); ++idx)
+            MarkDirectDynablock(getDB(idx), new_addr, new_size);
+    }
+}
+
+dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks)
+{
+    if(!dynablocks)
+        return NULL;
+    dynablock_t* db;
+    kh_foreach_value(dynablocks, db, 
+        const uintptr_t s = (uintptr_t)db->block;
+        const uintptr_t e = (uintptr_t)db->block+db->size;
+        if((uintptr_t)addr>=s && (uintptr_t)addr<e)
+            return db->father?db->father:db;
+    )
+    return NULL;
+}
+
+static dynablocklist_t* getDBFromAddress(uintptr_t addr)
+{
+    const uintptr_t idx = (addr>>DYNAMAP_SHIFT);
+    return getDB(idx);
+}
+
+dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created)
+{
+    if(!dynablocks) {
+        dynarec_log(LOG_INFO, "Warning: Ask to create a dynablock with a NULL dynablocklist (addr=%p)\n", (void*)addr);
+        *created = 0;
+        return NULL;
+    }
+    if((addr<dynablocks->text) || (addr>=(dynablocks->text+dynablocks->textsz))) {
+        // this should be useless
+        //dynarec_log(LOG_INFO, "Warning: Refused to create a Direct Block that is out-of-bound: dynablocks=%p (%p:%p), addr=%p\n", dynablocks, (void*)(dynablocks->text), (void*)(dynablocks->text+dynablocks->textsz), (void*)addr);
+        //*created = 0;
+        //return NULL;
+        return AddNewDynablock(getDBFromAddress(addr), addr, created);
+    }
+    dynablock_t* block = NULL;
+    // first, check if it exist in direct access mode
+    if(dynablocks->direct) {
+        block = dynablocks->direct[addr-dynablocks->text];
+        if(block) {
+            dynarec_log(LOG_DUMP, "Block already exist in Direct Map\n");
+            *created = 0;
+            return block;
+        }
+    }
+    
+    if (!*created)
+        return block;
+    
+    if(!dynablocks->direct) {
+        dynablock_t** p = (dynablock_t**)calloc(dynablocks->textsz, sizeof(dynablock_t*));
+        if(arm64_lock_storeifnull(&dynablocks->direct, p)!=p)
+            free(p);    // someone already create the direct array, too late...
+    }
+
+    // create and add new block
+    dynarec_log(LOG_DUMP, "Ask for DynaRec Block creation @%p\n", (void*)addr);
+
+    block = (dynablock_t*)calloc(1, sizeof(dynablock_t));
+    block->parent = dynablocks; 
+    dynablock_t* tmp = (dynablock_t*)arm64_lock_storeifnull(&dynablocks->direct[addr-dynablocks->text], block);
+    if(tmp !=  block) {
+        // a block appeard!
+        free(block);
+        *created = 0;
+        return tmp;
+    }
+
+    *created = 1;
+    return block;
+}
+
+/* 
+    return NULL if block is not found / cannot be created. 
+    Don't create if create==0
+*/
+static dynablock_t* internalDBGetBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr, int create, dynablock_t* current)
+{
+    // try the quickest way first: get parent of current and check if ok!
+    dynablocklist_t *dynablocks = NULL;
+    dynablock_t* block = NULL;
+    if(current) {
+        dynablocks = current->parent;
+        if(dynablocks && !(addr>=dynablocks->text && addr<(dynablocks->text+dynablocks->textsz)))
+            dynablocks = NULL;
+    }
+    // nope, lets do the long way
+    if(!dynablocks) {
+        dynablocks = getDBFromAddress(addr);
+        if(!dynablocks) {
+            dynablocks = GetDynablocksFromAddress(emu->context, addr);
+            if(!dynablocks)
+                return NULL;
+        }
+    }
+    // check direct first, without lock
+    if(dynablocks->direct/* && (addr>=dynablocks->text) && (addr<(dynablocks->text+dynablocks->textsz))*/)
+        if((block = dynablocks->direct[addr-dynablocks->text]))
+            return block;
+
+    int created = create;
+    block = AddNewDynablock(dynablocks, addr, &created);
+    if(!created)
+        return block;   // existing block...
+
+    if(box64_dynarec_dump)
+        pthread_mutex_lock(&my_context->mutex_dyndump);
+    // fill the block
+    block->x64_addr = (void*)addr;
+    if(0/*!FillBlock64(block, filladdr)*/) {
+        void* old = (void*)arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], 0);
+        if(old!=block && old) {// put it back in place, strange things are happening here!
+            dynarec_log(LOG_INFO, "Warning, a wild block appeared at %p: %p\n", (void*)addr, old);
+            arm64_lock_xchg(&dynablocks->direct[addr-dynablocks->text], (uintptr_t)old);
+        }
+        free(block);
+        block = NULL;
+    }
+    if(box64_dynarec_dump)
+        pthread_mutex_unlock(&my_context->mutex_dyndump);
+    // check size
+    if(block && block->x64_size) {
+        int blocksz = block->x64_size;
+        if(dynablocks->maxsz<blocksz) {
+            dynablocks->maxsz = blocksz;
+            for(int idx=(addr>>DYNAMAP_SHIFT)+1; idx<=((addr+blocksz)>>DYNAMAP_SHIFT); ++idx) {
+                dynablocklist_t* dblist;
+                if((dblist = getDB(idx)))
+                    if(dblist->maxsz<blocksz)
+                        dblist->maxsz = blocksz;
+            }
+        }
+        lockDB();
+        protectDBnolock((uintptr_t)block->x64_addr, block->x64_size);
+        // fill-in jumptable
+        addJumpTableIfDefault64(block->x64_addr, block->block);
+        for(int i=0; i<block->sons_size; ++i)
+            addJumpTableIfDefault64(block->sons[i]->x64_addr, block->sons[i]->block);
+        unlockDB();
+    }
+
+    dynarec_log(LOG_DEBUG, " --- DynaRec Block %s @%p:%p (%p, 0x%x bytes, with %d son(s))\n", created?"created":"recycled", (void*)addr, (void*)(addr+((block)?block->x64_size:0)), (block)?block->block:0, (block)?block->size:0, (block)?block->sons_size:0);
+
+    return block;
+}
+
+dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current)
+{
+    dynablock_t *db = internalDBGetBlock(emu, addr, addr, create, *current);
+    if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) {
+        dynablock_t *father = db->father?db->father:db;
+        uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size);
+        if(hash!=father->hash) {
+            dynarec_log(LOG_DEBUG, "Invalidating block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr);
+            // no more current if it gets invalidated too
+            if(*current && father->x64_addr>=(*current)->x64_addr && (father->x64_addr+father->x64_size)<(*current)->x64_addr)
+                *current = NULL;
+            // Free father, it's now invalid!
+            FreeDynablock(father);
+            // start again... (will create a new block)
+            db = internalDBGetBlock(emu, addr, addr, create, *current);
+        } else {
+            father->need_test = 0;
+            lockDB();
+            protectDBnolock((uintptr_t)father->x64_addr, father->x64_size);
+            // fill back jumptable
+            addJumpTableIfDefault64(father->x64_addr, father->block);
+            for(int i=0; i<father->sons_size; ++i)
+                addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
+            unlockDB();
+        }
+    } 
+    return db;
+}
+
+dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr)
+{
+    dynarec_log(LOG_DEBUG, "Creating AlternateBlock at %p for %p\n", (void*)addr, (void*)filladdr);
+    int create = 1;
+    dynablock_t *db = internalDBGetBlock(emu, addr, filladdr, create, NULL);
+    if(db && db->done && db->block && (db->need_test || (db->father && db->father->need_test))) {
+        dynablock_t *father = db->father?db->father:db;
+        uint32_t hash = X31_hash_code(father->x64_addr, father->x64_size);
+        if(hash!=father->hash) {
+            dynarec_log(LOG_DEBUG, "Invalidating alt block %p from %p:%p (hash:%X/%X) with %d son(s) for %p\n", father, father->x64_addr, father->x64_addr+father->x64_size, hash, father->hash, father->sons_size, (void*)addr);
+            // Free father, it's now invalid!
+            FreeDynablock(father);
+            // start again... (will create a new block)
+            db = internalDBGetBlock(emu, addr, filladdr, create, NULL);
+        } else {
+            father->need_test = 0;
+            lockDB();
+            protectDBnolock((uintptr_t)father->x64_addr, father->x64_size);
+            // fill back jumptable
+            addJumpTableIfDefault64(father->x64_addr, father->block);
+            for(int i=0; i<father->sons_size; ++i)
+                addJumpTableIfDefault64(father->sons[i]->x64_addr, father->sons[i]->block);
+            unlockDB();
+        }
+    } 
+    return db;
+}
diff --git a/src/dynarec/dynablock_private.h b/src/dynarec/dynablock_private.h
new file mode 100755
index 00000000..dd2ee4c0
--- /dev/null
+++ b/src/dynarec/dynablock_private.h
@@ -0,0 +1,36 @@
+#ifndef __DYNABLOCK_PRIVATE_H_
+#define __DYNABLOCK_PRIVATE_H_
+
+typedef struct dynablocklist_s  dynablocklist_t;
+
+typedef struct instsize_s {
+    unsigned int x64:4;
+    unsigned int nat:4;
+} instsize_t;
+
+typedef struct dynablock_s {
+    dynablocklist_t* parent;
+    void*           block;
+    int             size;
+    void*           x64_addr;
+    uintptr_t       x64_size;
+    uint32_t        hash;
+    uint8_t         need_test;
+    uint8_t         done;
+    uint8_t         gone;
+    uint8_t         dummy;
+    int             isize;
+    dynablock_t**   sons;   // sons (kind-of dummy dynablock...)
+    int             sons_size;
+    dynablock_t*    father; // set only in the case of a son
+    instsize_t*     instsize;
+} dynablock_t;
+
+typedef struct dynablocklist_s {
+    uintptr_t           text;
+    int                 textsz;
+    int                 maxsz;     // maxblock size (for this block or previous block)
+    dynablock_t**       direct;    // direct mapping (waste of space, so the array is created at first write)
+} dynablocklist_t;
+
+#endif //__DYNABLOCK_PRIVATE_H_
\ No newline at end of file
diff --git a/src/dynarec/dynarec.c b/src/dynarec/dynarec.c
index c88cd61f..8769bc1e 100755
--- a/src/dynarec/dynarec.c
+++ b/src/dynarec/dynarec.c
@@ -22,10 +22,10 @@
 #endif
 
 #ifdef DYNAREC
-#ifdef ARM
-void arm_prolog(x64emu_t* emu, void* addr) EXPORTDYN;
-void arm_epilog() EXPORTDYN;
-void arm_epilog_fast() EXPORTDYN;
+#ifdef ARM64
+void arm64_prolog(x64emu_t* emu, void* addr) EXPORTDYN;
+void arm64_epilog() EXPORTDYN;
+void arm64_epilog_fast() EXPORTDYN;
 #endif
 #endif
 
@@ -39,7 +39,7 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2)
     if(!addr) {
         x2-=8;  // actual PC is 2 instructions ahead
         dynablock_t* db = FindDynablockFromNativeAddress(x2);
-        printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x86addr=%p)\n", x2, db, db?(void*)getX86Address(db, (uintptr_t)x2):NULL);
+        printf_log(LOG_NONE, "Warning, jumping to NULL address from %p (db=%p, x64addr=%p)\n", x2, db, db?(void*)getX64Address(db, (uintptr_t)x2):NULL);
     }
     #endif
     dynablock_t* current = NULL;
@@ -47,17 +47,17 @@ void* LinkNext(x64emu_t* emu, uintptr_t addr, void* x2)
     dynablock_t* block = DBGetBlock(emu, addr, 1, &current);
     if(!block) {
         // no block, let link table as is...
-        //tableupdate(arm_epilog, addr, table);
-        return arm_epilog;
+        //tableupdate(arm64_epilog, addr, table);
+        return arm64_epilog;
     }
     if(!block->done) {
         // not finished yet... leave linker
         //tableupdate(arm_linker, addr, table);
-        return arm_epilog;
+        return arm64_epilog;
     }
     if(!(jblock=block->block)) {
         // null block, but done: go to epilog, no linker here
-        return arm_epilog;
+        return arm64_epilog;
     }
     //dynablock_t *father = block->father?block->father:block;
     return jblock;
@@ -82,7 +82,7 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
         }
     }
 #ifdef DYNAREC
-    if(!box86_dynarec)
+    if(!box64_dynarec)
 #endif
         EmuCall(emu, addr);
 #ifdef DYNAREC
@@ -107,18 +107,18 @@ void DynaCall(x64emu_t* emu, uintptr_t addr)
                 dynarec_log(LOG_DEBUG, "%04d|Calling Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu);
                 Run(emu, 1);
             } else {
-                dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x86 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu);
+                dynarec_log(LOG_DEBUG, "%04d|Calling DynaRec Block @%p (%p) of %d x64 instructions (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize ,block->father, emu);
                 CHECK_FLAGS(emu);
                 // block is here, let's run it!
-                #ifdef ARM
-                arm_prolog(emu, block->block);
+                #ifdef ARM64
+                arm64_prolog(emu, block->block);
                 #endif
             }
             if(emu->fork) {
                 int forktype = emu->fork;
                 emu->quit = 0;
                 emu->fork = 0;
-                emu = x86emu_fork(emu, forktype);
+                emu = x64emu_fork(emu, forktype);
                 if(emu->type == EMUTYPE_MAIN) {
                     ejb = GetJmpBuf();
                     ejb->emu = emu;
@@ -170,7 +170,7 @@ int DynaRun(x64emu_t* emu)
         }
     }
 #ifdef DYNAREC
-    if(!box86_dynarec)
+    if(!box64_dynarec)
 #endif
         return Run(emu, 0);
 #ifdef DYNAREC
@@ -186,17 +186,17 @@ int DynaRun(x64emu_t* emu)
                 dynarec_log(LOG_DEBUG, "%04d|Running Interpretor @%p, emu=%p\n", GetTID(), (void*)R_RIP, emu);
                 Run(emu, 1);
             } else {
-                dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x86 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu);
+                dynarec_log(LOG_DEBUG, "%04d|Running DynaRec Block @%p (%p) of %d x64 insts (father=%p) emu=%p\n", GetTID(), (void*)R_RIP, block->block, block->isize, block->father, emu);
                 // block is here, let's run it!
-                #ifdef ARM
-                arm_prolog(emu, block->block);
+                #ifdef ARM64
+                arm64_prolog(emu, block->block);
                 #endif
             }
             if(emu->fork) {
                 int forktype = emu->fork;
                 emu->quit = 0;
                 emu->fork = 0;
-                emu = x86emu_fork(emu, forktype);
+                emu = x64emu_fork(emu, forktype);
                 if(emu->type == EMUTYPE_MAIN) {
                     ejb = GetJmpBuf();
                     ejb->emu = emu;
diff --git a/src/dynarec/dynarec_arm64.c b/src/dynarec/dynarec_arm64.c
new file mode 100755
index 00000000..b3b0b06c
--- /dev/null
+++ b/src/dynarec/dynarec_arm64.c
@@ -0,0 +1,449 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <string.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "custommem.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynablock.h"
+#include "dynablock_private.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+#include "elfloader.h"
+
+void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name) {
+    uint8_t *ip = (uint8_t*)inst->addr;
+    if(ip[0]==0xcc && ip[1]=='S' && ip[2]=='C') {
+        uintptr_t a = *(uintptr_t*)(ip+3);
+        if(a==0) {
+            dynarec_log(LOG_NONE, "%s%p: Exit x64emu%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (box64_dynarec_dump>1)?"\e[m":"");
+        } else {
+            dynarec_log(LOG_NONE, "%s%p: Native call to %p%s\n", (box64_dynarec_dump>1)?"\e[1m":"", (void*)ip, (void*)a, (box64_dynarec_dump>1)?"\e[m":"");
+        }
+    } else {
+        if(dec) {
+            dynarec_log(LOG_NONE, "%s%p: %s", (box64_dynarec_dump>1)?"\e[1m":"", ip, DecodeX64Trace(dec, inst->addr));
+        } else {
+            dynarec_log(LOG_NONE, "%s%p: ", (box64_dynarec_dump>1)?"\e[1m":"", ip);
+            for(int i=0; i<inst->size; ++i) {
+                dynarec_log(LOG_NONE, "%02X ", ip[i]);
+            }
+            dynarec_log(LOG_NONE, " %s", name);
+        }
+        // print Call function name if possible
+        if(ip[0]==0xE8 || ip[0]==0xE9) { // Call / Jmp
+            uintptr_t nextaddr = (uintptr_t)ip + 5 + *((int32_t*)(ip+1));
+            printFunctionAddr(nextaddr, "=> ");
+        } else if(ip[0]==0xFF) {
+            if(ip[1]==0x25) {
+                uintptr_t nextaddr = (uintptr_t)ip + 6 + *((int32_t*)(ip+2));
+                printFunctionAddr(nextaddr, "=> ");
+            }
+        }
+        // end of line and colors
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");
+    }
+}
+
+void add_next(dynarec_arm_t *dyn, uintptr_t addr) {
+    if(dyn->next_sz == dyn->next_cap) {
+        dyn->next_cap += 16;
+        dyn->next = (uintptr_t*)realloc(dyn->next, dyn->next_cap*sizeof(uintptr_t));
+    }
+    for(int i=0; i<dyn->next_sz; ++i)
+        if(dyn->next[i]==addr)
+            return;
+    dyn->next[dyn->next_sz++] = addr;
+}
+uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr) {
+    // get closest, but no addresses befores
+    uintptr_t best = 0;
+    int i = 0;
+    while((i<dyn->next_sz) && (best!=addr)) {
+        if(dyn->next[i]<addr) { // remove the address, it's before current address
+            memmove(dyn->next+i, dyn->next+i+1, (dyn->next_sz-i-1)*sizeof(uintptr_t));
+            --dyn->next_sz;
+        } else {
+            if((dyn->next[i]<best) || !best)
+                best = dyn->next[i];
+            ++i;
+        }
+    }
+    return best;
+}
+#define PK(A) (*((uint8_t*)(addr+(A))))
+int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n)
+{
+    if(!n)
+        return 1;
+    if(PK(0)==0x90)
+        return is_nops(dyn, addr+1, n-1);
+    if(n>1 && PK(0)==0x66)  // if opcode start with 0x66, and there is more after, than is *can* be a NOP
+        return is_nops(dyn, addr+1, n-1);
+    if(n>2 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x00)
+        return is_nops(dyn, addr+3, n-3);
+    if(n>2 && PK(0)==0x8d && PK(1)==0x76 && PK(2)==0x00)    // lea esi, [esi]
+        return is_nops(dyn, addr+3, n-3);
+    if(n>3 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x40 && PK(3)==0x00)
+        return is_nops(dyn, addr+4, n-4);
+    if(n>3 && PK(0)==0x8d && PK(1)==0x74 && PK(2)==0x26 && PK(3)==0x00)
+        return is_nops(dyn, addr+4, n-4);
+    if(n>4 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x44 && PK(3)==0x00 && PK(4)==0x00)
+        return is_nops(dyn, addr+5, n-5);
+    if(n>5 && PK(0)==0x8d && PK(1)==0xb6 && PK(2)==0x00 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00)
+        return is_nops(dyn, addr+6, n-6);
+    if(n>6 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x80 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00)
+        return is_nops(dyn, addr+7, n-7);
+    if(n>6 && PK(0)==0x8d && PK(1)==0xb4 && PK(2)==0x26 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00) // lea esi, [esi+0]
+        return is_nops(dyn, addr+7, n-7);
+    if(n>7 && PK(0)==0x0f && PK(1)==0x1f && PK(2)==0x84 && PK(3)==0x00 && PK(4)==0x00 && PK(5)==0x00 && PK(6)==0x00 && PK(7)==0x00)
+        return is_nops(dyn, addr+8, n-8);
+    return 0;
+}
+
+// return size of next instuciton, -1 is unknown
+// not all instrction are setup
+int next_instruction(dynarec_arm_t *dyn, uintptr_t addr)
+{
+    uint8_t opcode = PK(0);
+    uint8_t nextop;
+    switch (opcode) {
+        case 0x66:
+            opcode = PK(1);
+            switch(opcode) {
+                case 0x90:
+                    return 2;
+            }
+            break;
+        case 0x81:
+            nextop = PK(1);
+            return fakeed(dyn, addr+2, 0, nextop)-addr + 4;
+        case 0x83:
+            nextop = PK(1);
+            return fakeed(dyn, addr+2, 0, nextop)-addr + 1;
+        case 0x84:
+        case 0x85:
+        case 0x88:
+        case 0x89:
+        case 0x8A:
+        case 0x8B:
+        case 0x8C:
+        case 0x8D:
+        case 0x8E:
+        case 0x8F:
+            nextop = PK(1);
+            return fakeed(dyn, addr+2, 0, nextop)-addr;
+        case 0x50:
+        case 0x51:
+        case 0x52:
+        case 0x53:
+        case 0x54:
+        case 0x55:
+        case 0x56:
+        case 0x57:
+        case 0x58:
+        case 0x59:
+        case 0x5A:
+        case 0x5B:
+        case 0x5C:
+        case 0x5D:
+        case 0x5E:
+        case 0x5F:
+        case 0x90:
+        case 0x91:
+        case 0x92:
+        case 0x93:
+        case 0x94:
+        case 0x95:
+        case 0x96:
+        case 0x97:
+        case 0x98:
+        case 0x99:
+        case 0x9B:
+        case 0x9C:
+        case 0x9D:
+        case 0x9E:
+        case 0x9F:
+            return 1;
+        case 0xA0:
+        case 0xA1:
+        case 0xA2:
+        case 0xA3:
+            return 5;
+        case 0xB0:
+        case 0xB1:
+        case 0xB2:
+        case 0xB3:
+        case 0xB4:
+        case 0xB5:
+        case 0xB6:
+        case 0xB7:
+            return 2;
+        case 0xB8:
+        case 0xB9:
+        case 0xBA:
+        case 0xBB:
+        case 0xBC:
+        case 0xBD:
+        case 0xBE:
+        case 0xBF:
+            return 5;
+        case 0xFF:
+            nextop = PK(1);
+            switch((nextop>>3)&7) {
+                case 0: // INC Ed
+                case 1: //DEC Ed
+                case 2: // CALL Ed
+                case 4: // JMP Ed
+                case 6: // Push Ed
+                    return fakeed(dyn, addr+2, 0, nextop)-addr;
+            }
+            break;
+        default:
+            break;
+    }
+    return -1;
+}
+#undef PK
+
+int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n)
+{
+    int i = 0;
+    while(i<n) {
+        int j=next_instruction(dyn, addr+i);
+        if(j<=0) return 0;
+        i+=j;
+    }
+    return (i==n)?1:0;
+}
+
+uint32_t needed_flags(dynarec_arm_t *dyn, int ninst, uint32_t setf, int recurse)
+{
+    if(recurse == 10)
+        return X_PEND;
+    if(ninst == dyn->size)
+        return X_PEND; // no more instructions, or too many jmp loop, stop
+    
+    uint32_t needed = dyn->insts[ninst].x64.use_flags;
+    if(needed) {
+        setf &= ~needed;
+        if(!setf)   // all flags already used, no need to continue
+            return needed;
+    }
+
+    if(!needed && !dyn->insts[ninst].x64.set_flags && !dyn->insts[ninst].x64.jmp_insts) {
+        int start = ninst;
+        int end = ninst;
+        while(end<dyn->size && !dyn->insts[end].x64.use_flags && !dyn->insts[end].x64.set_flags && !dyn->insts[end].x64.jmp_insts)
+            ++end;
+        needed = needed_flags(dyn, end, setf, recurse);
+        for(int i=start; i<end; ++i)
+            dyn->insts[i].x64.need_flags = needed;
+        return needed;
+    }
+
+    if(dyn->insts[ninst].x64.set_flags && (dyn->insts[ninst].x64.state_flags!=SF_MAYSET)) {
+        if((setf & ~dyn->insts[ninst].x64.set_flags) == 0)
+            return needed;    // all done, gives all the flags needed
+        setf |= dyn->insts[ninst].x64.set_flags;    // add new flags to continue
+    }
+
+    int jinst = dyn->insts[ninst].x64.jmp_insts;
+    if(dyn->insts[ninst].x64.jmp) {
+        dyn->insts[ninst].x64.need_flags = (jinst==-1)?X_PEND:needed_flags(dyn, jinst, setf, recurse+1);
+        if(dyn->insts[ninst].x64.use_flags)  // conditionnal jump
+             dyn->insts[ninst].x64.need_flags |= needed_flags(dyn, ninst+1, setf, recurse);
+    } else
+        dyn->insts[ninst].x64.need_flags = needed_flags(dyn, ninst+1, setf, recurse);
+    if(dyn->insts[ninst].x64.state_flags==SF_MAYSET)
+        needed |= dyn->insts[ninst].x64.need_flags;
+    else
+        needed |= (dyn->insts[ninst].x64.need_flags & ~dyn->insts[ninst].x64.set_flags);
+    if(needed == (X_PEND|X_ALL))
+        needed = X_ALL;
+    return needed;
+}
+
+instsize_t* addInst(instsize_t* insts, size_t* size, size_t* cap, int x64_size, int arm_size)
+{
+    // x64 instruction is <16 bytes
+    int toadd;
+    if(x64_size>arm_size)
+        toadd = 1 + x64_size/15;
+    else
+        toadd = 1 + arm_size/15;
+    if((*size)+toadd>(*cap)) {
+        *cap = (*size)+toadd;
+        insts = (instsize_t*)realloc(insts, (*cap)*sizeof(instsize_t));
+    }
+    while(toadd) {
+        if(x64_size>15)
+            insts[*size].x64 = 15;    
+        else
+            insts[*size].x64 = x64_size;
+        x64_size -= insts[*size].x64;
+        if(arm_size>15)
+            insts[*size].nat = 15;
+        else
+            insts[*size].nat = arm_size;
+        arm_size -= insts[*size].nat;
+        ++(*size);
+        --toadd;
+    }
+    return insts;
+}
+
+void arm_pass0(dynarec_arm_t* dyn, uintptr_t addr);
+void arm_pass1(dynarec_arm_t* dyn, uintptr_t addr);
+void arm_pass2(dynarec_arm_t* dyn, uintptr_t addr);
+void arm_pass3(dynarec_arm_t* dyn, uintptr_t addr);
+
+void* FillBlock(dynablock_t* block, uintptr_t addr) {
+    if(addr>=box64_nodynarec_start && addr<box64_nodynarec_end)
+        return NULL;
+    // init the helper
+    dynarec_arm_t helper = {0};
+    helper.start = addr;
+    arm_pass0(&helper, addr);
+    if(!helper.size) {
+        dynarec_log(LOG_DEBUG, "Warning, null-sized dynarec block (%p)\n", (void*)addr);
+        block->done = 1;
+        free(helper.next);
+        return (void*)block;
+    }
+    helper.cap = helper.size+3; // needs epilog handling
+    helper.insts = (instruction_arm64_t*)calloc(helper.cap, sizeof(instruction_arm64_t));
+    // pass 1, addresses, x64 jump addresses, flags
+    arm_pass1(&helper, addr);
+    // calculate barriers
+    uintptr_t start = helper.insts[0].x64.addr;
+    uintptr_t end = helper.insts[helper.size].x64.addr+helper.insts[helper.size].x64.size;
+    for(int i=0; i<helper.size; ++i)
+        if(helper.insts[i].x64.jmp) {
+            uintptr_t j = helper.insts[i].x64.jmp;
+            if(j<start || j>=end)
+                helper.insts[i].x64.jmp_insts = -1;
+            else {
+                // find jump address instruction
+                int k=-1;
+                for(int i2=0; i2<helper.size && k==-1; ++i2) {
+                    if(helper.insts[i2].x64.addr==j)
+                        k=i2;
+                }
+                if(k!=-1)   // -1 if not found, mmm, probably wrong, exit anyway
+                    helper.insts[k].x64.barrier = 1;
+                helper.insts[i].x64.jmp_insts = k;
+            }
+        }
+    for(int i=0; i<helper.size; ++i)
+        if(helper.insts[i].x64.set_flags && !helper.insts[i].x64.need_flags) {
+            helper.insts[i].x64.need_flags = needed_flags(&helper, i+1, helper.insts[i].x64.set_flags, 0);
+            if((helper.insts[i].x64.need_flags&X_PEND) && (helper.insts[i].x64.state_flags==SF_MAYSET))
+                helper.insts[i].x64.need_flags = X_ALL;
+        }
+    
+    // pass 2, instruction size
+    arm_pass2(&helper, addr);
+    // ok, now allocate mapped memory, with executable flag on
+    int sz = helper.arm_size;
+    void* p = (void*)AllocDynarecMap(block, sz);
+    if(p==NULL) {
+        dynarec_log(LOG_DEBUG, "AllocDynarecMap(%p, %d) failed, cancelling block\n", block, sz);
+        free(helper.insts);
+        free(helper.next);
+        return NULL;
+    }
+    helper.block = p;
+    helper.arm_start = (uintptr_t)p;
+    if(helper.sons_size) {
+        helper.sons_x64 = (uintptr_t*)calloc(helper.sons_size, sizeof(uintptr_t));
+        helper.sons_arm = (void**)calloc(helper.sons_size, sizeof(void*));
+    }
+    // pass 3, emit (log emit arm opcode)
+    if(box64_dynarec_dump) {
+        dynarec_log(LOG_NONE, "%s%04d|Emitting %d bytes for %d x64 bytes", (box64_dynarec_dump>1)?"\e[01;36m":"", GetTID(), helper.arm_size, helper.isize); 
+        printFunctionAddr(helper.start, " => ");
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");
+    }
+    helper.arm_size = 0;
+    arm_pass3(&helper, addr);
+    if(sz!=helper.arm_size) {
+        printf_log(LOG_NONE, "BOX64: Warning, size difference in block between pass2 (%d) & pass3 (%d)!\n", sz, helper.arm_size);
+        uint8_t *dump = (uint8_t*)helper.start;
+        printf_log(LOG_NONE, "Dump of %d x64 opcodes:\n", helper.size);
+        for(int i=0; i<helper.size; ++i) {
+            printf_log(LOG_NONE, "%p:", dump);
+            for(; dump<(uint8_t*)helper.insts[i+1].x64.addr; ++dump)
+                printf_log(LOG_NONE, " %02X", *dump);
+            printf_log(LOG_NONE, "\t%d -> %d\n", helper.insts[i].size2, helper.insts[i].size);
+        }
+        printf_log(LOG_NONE, " ------------\n");
+    }
+    // all done...
+    __clear_cache(p, p+sz);   // need to clear the cache before execution...
+    // keep size of instructions for signal handling
+    {
+        size_t cap = 1;
+        for(int i=0; i<helper.size; ++i)
+            cap += 1 + ((helper.insts[i].x64.size>helper.insts[i].size)?helper.insts[i].x64.size:helper.insts[i].size)/15;
+        size_t size = 0;
+        block->instsize = (instsize_t*)calloc(cap, sizeof(instsize_t));
+        for(int i=0; i<helper.size; ++i)
+            block->instsize = addInst(block->instsize, &size, &cap, helper.insts[i].x64.size, helper.insts[i].size/4);
+        block->instsize = addInst(block->instsize, &size, &cap, 0, 0);    // add a "end of block" mark, just in case
+    }
+    // ok, free the helper now
+    free(helper.insts);
+    free(helper.next);
+    block->size = sz;
+    block->isize = helper.size;
+    block->block = p;
+    block->need_test = 0;
+    //block->x64_addr = (void*)start;
+    block->x64_size = end-start;
+    if(box64_dynarec_largest<block->x64_size)
+        box64_dynarec_largest = block->x64_size;
+    block->hash = X31_hash_code(block->x64_addr, block->x64_size);
+    // fill sons if any
+    dynablock_t** sons = NULL;
+    int sons_size = 0;
+    if(helper.sons_size) {
+        sons = (dynablock_t**)calloc(helper.sons_size, sizeof(dynablock_t*));
+        for (int i=0; i<helper.sons_size; ++i) {
+            int created = 1;
+            dynablock_t *son = AddNewDynablock(block->parent, helper.sons_x64[i], &created);
+            if(created) {    // avoid breaking a working block!
+                son->block = helper.sons_arm[i];
+                son->x64_addr = (void*)helper.sons_x64[i];
+                son->x64_size = end-helper.sons_x64[i];
+                if(!son->x64_size) {printf_log(LOG_NONE, "Warning, son with null x64 size! (@%p / ARM=%p)", son->x64_addr, son->block);}
+                son->father = block;
+                son->done = 1;
+                sons[sons_size++] = son;
+                if(!son->parent)
+                    son->parent = block->parent;
+            }
+        }
+        if(sons_size) {
+            block->sons = sons;
+            block->sons_size = sons_size;
+        } else
+            free(sons);
+    }
+    free(helper.sons_x64);
+    free(helper.sons_arm);
+    block->done = 1;
+    return (void*)block;
+}
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_functions.c b/src/dynarec/dynarec_arm64_functions.c
new file mode 100755
index 00000000..784739ac
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_functions.c
@@ -0,0 +1,354 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <string.h>
+#include <math.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "emu/x87emu_private.h"
+#include "x64trace.h"
+#include "signals.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "dynarec_arm64_functions.h"
+
+void arm_fstp(x64emu_t* emu, void* p)
+{
+    if(ST0.q!=STld(0).ref)
+        D2LD(&ST0.d, p);
+    else
+        memcpy(p, &STld(0).ld, 10);
+}
+
+void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n)
+{
+    dynarec_log(LOG_DEBUG, "R%ld=0x%lx (%ld)\n", n, reg, reg);
+}
+
+void arm_f2xm1(x64emu_t* emu)
+{
+    ST0.d = exp2(ST0.d) - 1.0;
+}
+void arm_fyl2x(x64emu_t* emu)
+{
+    ST(1).d = log2(ST0.d)*ST(1).d;
+}
+void arm_ftan(x64emu_t* emu)
+{
+    ST0.d = tan(ST0.d);
+}
+void arm_fpatan(x64emu_t* emu)
+{
+    ST1.d = atan2(ST1.d, ST0.d);
+}
+void arm_fxtract(x64emu_t* emu)
+{
+    int32_t tmp32s = (ST1.q&0x7ff0000000000000LL)>>52;
+    tmp32s -= 1023;
+    ST1.d /= exp2(tmp32s);
+    ST0.d = tmp32s;
+}
+void arm_fprem(x64emu_t* emu)
+{
+    int32_t tmp32s = ST0.d / ST1.d;
+    ST0.d -= ST1.d * tmp32s;
+    emu->sw.f.F87_C2 = 0;
+    emu->sw.f.F87_C0 = (tmp32s&1);
+    emu->sw.f.F87_C3 = ((tmp32s>>1)&1);
+    emu->sw.f.F87_C1 = ((tmp32s>>2)&1);
+}
+void arm_fyl2xp1(x64emu_t* emu)
+{
+    ST(1).d = log2(ST0.d + 1.0)*ST(1).d;
+}
+void arm_fsincos(x64emu_t* emu)
+{
+    sincos(ST1.d, &ST1.d, &ST0.d);
+}
+void arm_frndint(x64emu_t* emu)
+{
+    ST0.d = fpu_round(emu, ST0.d);
+}
+void arm_fscale(x64emu_t* emu)
+{
+    ST0.d *= exp2(trunc(ST1.d));
+}
+void arm_fsin(x64emu_t* emu)
+{
+    ST0.d = sin(ST0.d);
+}
+void arm_fcos(x64emu_t* emu)
+{
+    ST0.d = cos(ST0.d);
+}
+
+void arm_fbld(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_fbld(emu, ed);
+}
+
+void arm_fild64(x64emu_t* emu, int64_t* ed)
+{
+    int64_t tmp;
+    memcpy(&tmp, ed, sizeof(tmp));
+    ST0.d = tmp;
+    STll(0).ll = tmp;
+    STll(0).ref = ST0.q;
+}
+
+void arm_fbstp(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_fbst(emu, ed);
+}
+
+void arm_fistp64(x64emu_t* emu, int64_t* ed)
+{
+    // used of memcpy to avoid aligments issues
+    if(STll(0).ref==ST(0).q) {
+        memcpy(ed, &STll(0).ll, sizeof(int64_t));
+    } else {
+        int64_t tmp;
+        if(isgreater(ST0.d, (double)(int64_t)0x7fffffffffffffffLL) || isless(ST0.d, (double)(int64_t)0x8000000000000000LL) || !isfinite(ST0.d))
+            tmp = 0x8000000000000000LL;
+        else
+            tmp = fpu_round(emu, ST0.d);
+        memcpy(ed, &tmp, sizeof(tmp));
+    }
+}
+
+void arm_fistt64(x64emu_t* emu, int64_t* ed)
+{
+    // used of memcpy to avoid aligments issues
+    int64_t tmp = ST0.d;
+    memcpy(ed, &tmp, sizeof(tmp));
+}
+
+void arm_fld(x64emu_t* emu, uint8_t* ed)
+{
+    memcpy(&STld(0).ld, ed, 10);
+    LD2D(&STld(0), &ST(0).d);
+    STld(0).ref = ST0.q;
+}
+
+void arm_ud(x64emu_t* emu)
+{
+    emit_signal(emu, SIGILL, (void*)R_RIP, 0);
+}
+
+void arm_fsave(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_savenv(emu, (char*)ed, 0);
+
+    uint8_t* p = ed;
+    p += 28;
+    for (int i=0; i<8; ++i) {
+        LD2D(p, &ST(i).d);
+        p+=10;
+    }
+}
+void arm_frstor(x64emu_t* emu, uint8_t* ed)
+{
+    fpu_loadenv(emu, (char*)ed, 0);
+
+    uint8_t* p = ed;
+    p += 28;
+    for (int i=0; i<8; ++i) {
+        D2LD(&ST(i).d, p);
+        p+=10;
+    }
+
+}
+
+void arm_fprem1(x64emu_t* emu)
+{
+    // simplified version
+    int32_t tmp32s = round(ST0.d / ST1.d);
+    ST0.d -= ST1.d*tmp32s;
+    emu->sw.f.F87_C2 = 0;
+    emu->sw.f.F87_C0 = (tmp32s&1);
+    emu->sw.f.F87_C3 = ((tmp32s>>1)&1);
+    emu->sw.f.F87_C1 = ((tmp32s>>2)&1);
+}
+
+
+// Get a FPU single scratch reg
+int fpu_get_scratch_single(dynarec_arm_t* dyn)
+{
+    return dyn->fpu_scratch++;  // return an Sx
+}
+// Get a FPU double scratch reg
+int fpu_get_scratch_double(dynarec_arm_t* dyn)
+{
+    int i = (dyn->fpu_scratch+1)&(~1);
+    dyn->fpu_scratch = i+2;
+    return i/2; // return a Dx
+}
+// Get a FPU quad scratch reg
+int fpu_get_scratch_quad(dynarec_arm_t* dyn)
+{
+    if(dyn->fpu_scratch>4) {
+        if(dyn->fpu_extra_qscratch) {
+            dynarec_log(LOG_NONE, "Warning, Extra QScratch slot taken and need another one!\n");
+        } else
+            dyn->fpu_extra_qscratch = fpu_get_reg_quad(dyn);
+        return dyn->fpu_extra_qscratch;
+    }
+    int i = (dyn->fpu_scratch+3)&(~3);
+    dyn->fpu_scratch = i+4;
+    return i/2; // Return a Dx, not a Qx
+}
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_arm_t* dyn)
+{
+    dyn->fpu_scratch = 0;
+    if(dyn->fpu_extra_qscratch) {
+        fpu_free_reg_quad(dyn, dyn->fpu_extra_qscratch);
+        dyn->fpu_extra_qscratch = 0;
+    }
+}
+#define FPUFIRST    8
+// Get a FPU double reg
+int fpu_get_reg_double(dynarec_arm_t* dyn)
+{
+    // TODO: check upper limit?
+    int i=0;
+    while (dyn->fpuused[i]) ++i;
+    dyn->fpuused[i] = 1;
+    return i+FPUFIRST; // return a Dx
+}
+// Free a FPU double reg
+void fpu_free_reg_double(dynarec_arm_t* dyn, int reg)
+{
+    // TODO: check upper limit?
+    int i=reg-FPUFIRST;
+    dyn->fpuused[i] = 0;
+}
+// Get a FPU quad reg
+int fpu_get_reg_quad(dynarec_arm_t* dyn)
+{
+    int i=0;
+    while (dyn->fpuused[i] || dyn->fpuused[i+1]) i+=2;
+    dyn->fpuused[i] = dyn->fpuused[i+1] = 1;
+    return i+FPUFIRST; // Return a Dx, not a Qx
+}
+// Free a FPU quad reg
+void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg)
+{
+    int i=reg-FPUFIRST;
+    dyn->fpuused[i] = dyn->fpuused[i+1] = 0;
+}
+// Reset fpu regs counter
+void fpu_reset_reg(dynarec_arm_t* dyn)
+{
+    dyn->fpu_reg = 0;
+    for (int i=0; i<24; ++i)
+        dyn->fpuused[i]=0;
+}
+
+#define F8      *(uint8_t*)(addr++)
+#define F32     *(uint32_t*)(addr+=4, addr-4)
+// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD
+int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity)
+{
+
+    uint32_t tested = (1<<parity)-1;
+    if((nextop&0xC0)==0xC0)
+        return 0;   // direct register, no parity...
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            int sib_reg = (sib>>3)&7;
+            if((sib&0x7)==5) {
+                uint32_t tmp = F32;
+                if (sib_reg!=4) {
+                    // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough
+                    return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0;
+                } else {
+                    // just a constant...
+                    return (tmp&tested)?0:1;
+                }
+            } else {
+                if(sib_reg==4 && parity<3)
+                    return 0;   // simple [reg]
+                // don't try [reg1 + reg2<<N], unless reg1 is ESP
+                return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0;
+            }
+        } else if((nextop&7)==5) {
+            uint32_t tmp = F32;
+            return (tmp&tested)?0:1;
+        } else {
+            return 0;
+        }
+    } else {
+        return 0; //Form [reg1 + reg2<<N + XXXXXX]
+    }
+}
+
+// Do the GETED, but don't emit anything...
+uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) 
+{
+    if((nextop&0xC0)==0xC0)
+        return addr;
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            if((sib&0x7)==5) {
+                addr+=4;
+            }
+        } else if((nextop&7)==5) {
+            addr+=4;
+        }
+    } else {
+        if((nextop&7)==4) {
+            ++addr;
+        }
+        if(nextop&0x80) {
+            addr+=4;
+        } else {
+            ++addr;
+        }
+    }
+    return addr;
+}
+#undef F8
+#undef F32
+
+int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn)
+{
+#define PK(a)       *(uint8_t*)(addr+a)
+#define PK64(a)     *(uint64_t*)(addr+a)
+
+    if(!addr)
+        return 0;
+    if(PK(0)==0xff && PK(1)==0x25) {  // absolute jump, maybe the GOT
+        uintptr_t a1 = (PK64(2));   // need to add a check to see if the address is from the GOT !
+        addr = *(uintptr_t*)a1; 
+    }
+    if(addr<0x10000)    // too low, that is suspicious
+        return 0;
+    onebridge_t *b = (onebridge_t*)(addr);
+    if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) {
+        // found !
+        if(retn) *retn = (b->C3==0xC2)?b->N:0;
+        if(calladdress) *calladdress = addr+1;
+        return 1;
+    }
+    return 0;
+#undef PK32
+#undef PK
+}
+
diff --git a/src/dynarec/dynarec_arm64_functions.h b/src/dynarec/dynarec_arm64_functions.h
new file mode 100755
index 00000000..d932aa4d
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_functions.h
@@ -0,0 +1,61 @@
+#ifndef __DYNAREC_ARM_FUNCTIONS_H__
+#define __DYNAREC_ARM_FUNCTIONS_H__
+
+typedef struct x64emu_s x64emu_t;
+
+void arm_fstp(x64emu_t* emu, void* p);
+
+void arm_print_armreg(x64emu_t* emu, uintptr_t reg, uintptr_t n);
+
+void arm_f2xm1(x64emu_t* emu);
+void arm_fyl2x(x64emu_t* emu);
+void arm_ftan(x64emu_t* emu);
+void arm_fpatan(x64emu_t* emu);
+void arm_fxtract(x64emu_t* emu);
+void arm_fprem(x64emu_t* emu);
+void arm_fyl2xp1(x64emu_t* emu);
+void arm_fsincos(x64emu_t* emu);
+void arm_frndint(x64emu_t* emu);
+void arm_fscale(x64emu_t* emu);
+void arm_fsin(x64emu_t* emu);
+void arm_fcos(x64emu_t* emu);
+void arm_fbld(x64emu_t* emu, uint8_t* ed);
+void arm_fild64(x64emu_t* emu, int64_t* ed);
+void arm_fbstp(x64emu_t* emu, uint8_t* ed);
+void arm_fistp64(x64emu_t* emu, int64_t* ed);
+void arm_fistt64(x64emu_t* emu, int64_t* ed);
+void arm_fld(x64emu_t* emu, uint8_t* ed);
+void arm_fsave(x64emu_t* emu, uint8_t* ed);
+void arm_frstor(x64emu_t* emu, uint8_t* ed);
+void arm_fprem1(x64emu_t* emu);
+
+void arm_ud(x64emu_t* emu);
+
+// Get an FPU single scratch reg
+int fpu_get_scratch_single(dynarec_arm_t* dyn);
+// Get an FPU double scratch reg
+int fpu_get_scratch_double(dynarec_arm_t* dyn);
+// Get an FPU quad scratch reg
+int fpu_get_scratch_quad(dynarec_arm_t* dyn);
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_arm_t* dyn);
+// Get an FPU double reg
+int fpu_get_reg_double(dynarec_arm_t* dyn);
+// Free a FPU double reg
+void fpu_free_reg_double(dynarec_arm_t* dyn, int reg);
+// Get an FPU quad reg
+int fpu_get_reg_quad(dynarec_arm_t* dyn);
+// Free a FPU quad reg
+void fpu_free_reg_quad(dynarec_arm_t* dyn, int reg);
+// Reset fpu regs counter
+void fpu_reset_reg(dynarec_arm_t* dyn);
+
+// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD
+int getedparity(dynarec_arm_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity);
+// Do the GETED, but don't emit anything...
+uintptr_t fakeed(dynarec_arm_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
+
+// Is what pointed at addr a native call? And if yes, to what function?
+int isNativeCall(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn);
+
+#endif //__DYNAREC_ARM_FUNCTIONS_H__
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_private.h b/src/dynarec/dynarec_arm64_private.h
new file mode 100755
index 00000000..8101cc2b
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_private.h
@@ -0,0 +1,56 @@
+#ifndef __DYNAREC_ARM_PRIVATE_H_
+#define __DYNAREC_ARM_PRIVATE_H_
+
+#include "dynarec_private.h"
+
+typedef struct x64emu_s x64emu_t;
+
+typedef struct instruction_arm_s {
+    instruction_x64_t   x64;
+    uintptr_t           address;    // (start) address of the arm emited instruction
+    uintptr_t           epilog;     // epilog of current instruction (can be start of next, of barrier stuff)
+    int                 size;       // size of the arm emited instruction
+    int                 size2;      // size of the arm emited instrucion after pass2
+    uintptr_t           mark, mark2, mark3;
+    uintptr_t           markf;
+    uintptr_t           markseg;
+    uintptr_t           marklock;
+    int                 pass2choice;// value for choices that are fixed on pass2 for pass3
+    uintptr_t           natcall;
+    int                 retn;
+} instruction_arm64_t;
+
+typedef struct dynarec_arm_s {
+    instruction_arm64_t *insts;
+    int32_t             size;
+    int32_t             cap;
+    uintptr_t           start;      // start of the block
+    uint32_t            isize;      // size in byte of x64 instructions included
+    void*               block;      // memory pointer where next instruction is emited
+    uintptr_t           arm_start;  // start of the arm code
+    int                 arm_size;   // size of emitted arm code
+    int                 state_flags;// actual state for on-demand flags
+    int                 x87cache[8];// cache status for the 8 x87 register behind the fpu stack
+    int                 x87reg[8];  // reg used for x87cache entry
+    int                 mmxcache[8];// cache status for the 8 MMX registers
+    int                 ssecache[8];// cache status for the 8 SSE(2) registers
+    int                 fpuused[24];// all 8..31 double reg from fpu, used by x87, sse and mmx
+    int                 x87stack;   // cache stack counter
+    int                 fpu_scratch;// scratch counter
+    int                 fpu_extra_qscratch; // some opcode need an extra quad scratch register
+    int                 fpu_reg;    // x87/sse/mmx reg counter
+    int                 dfnone;     // if defered flags is already set to df_none
+    uintptr_t*          next;       // variable array of "next" jump address
+    int                 next_sz;
+    int                 next_cap;
+    uintptr_t*          sons_x64;   // the x64 address of potential dynablock sons
+    void**              sons_arm;   // the arm address of potential dynablock sons
+    int                 sons_size;  // number of potential dynablock sons
+} dynarec_arm_t;
+
+void add_next(dynarec_arm_t *dyn, uintptr_t addr);
+uintptr_t get_closest_next(dynarec_arm_t *dyn, uintptr_t addr);
+int is_nops(dynarec_arm_t *dyn, uintptr_t addr, int n);
+int is_instructions(dynarec_arm_t *dyn, uintptr_t addr, int n);
+
+#endif //__DYNAREC_ARM_PRIVATE_H_
\ No newline at end of file
diff --git a/src/dynarec/dynarec_private.h b/src/dynarec/dynarec_private.h
new file mode 100755
index 00000000..66eb3bfa
--- /dev/null
+++ b/src/dynarec/dynarec_private.h
@@ -0,0 +1,35 @@
+#ifndef __DYNAREC_PRIVATE_H_
+#define __DYNAREC_PRIVATE_H_
+
+// all flags for the use_flags field
+#define X_CF    (1<<0)
+#define X_PF    (1<<1)
+#define X_AF    (1<<2)
+#define X_ZF    (1<<3)
+#define X_SF    (1<<4)
+#define X_OF    (1<<5)
+#define X_ALL   ((1<<6)-1)
+#define X_PEND  (0x1000)
+
+// all state flags
+#define SF_UNKNOWN  0
+#define SF_SET      1
+#define SF_PENDING  2
+#define SF_MAYSET   3
+#define SF_SUBSET   4
+
+typedef struct instruction_x64_s {
+    uintptr_t   addr;   //address of the instruction
+    int32_t     size;   // size of the instruction
+    int         barrier; // next instruction is a jump point, so no optim allowed
+    uintptr_t   jmp;    // offset to jump to, even if conditionnal (0 if not), no relative offset here
+    int         jmp_insts;  // instuction to jump to (-1 if out of the block)
+    uint32_t    use_flags;  // 0 or combination of X_?F
+    uint32_t    set_flags;  // 0 or combination of X_?F
+    uint32_t    need_flags; // calculated
+    int         state_flags; // One of SF_XXX state
+} instruction_x64_t;
+
+void printf_x64_instruction(zydis_dec_t* dec, instruction_x64_t* inst, const char* name);
+
+#endif //__DYNAREC_PRIVATE_H_
diff --git a/src/elfs/elfloader.c b/src/elfs/elfloader.c
index a9335de3..f821fd4b 100755
--- a/src/elfs/elfloader.c
+++ b/src/elfs/elfloader.c
@@ -25,7 +25,6 @@
 #include "x64emu.h"
 #include "box64stack.h"
 #include "callback.h"
-//#include "dynarec.h"
 #include "box64stack.h"
 #include "custommem.h"
 #include "wine_tools.h"
@@ -293,7 +292,7 @@ int LoadElfMemory(FILE* f, box64context_t* context, elfheader_t* head)
                 }
             }
 #ifdef DYNAREC
-            if(e->p_flags & PF_X) {
+            if(box64_dynarec && (e->p_flags & PF_X)) {
                 dynarec_log(LOG_DEBUG, "Add ELF eXecutable Memory %p:%p\n", dest, (void*)e->p_memsz);
                 addDBFromAddressRange((uintptr_t)dest, e->p_memsz);
             }
@@ -334,7 +333,8 @@ int ReloadElfMemory(FILE* f, box64context_t* context, elfheader_t* head)
             if(e->p_filesz) {
                 ssize_t r = -1;
                 #ifdef DYNAREC
-                unprotectDB((uintptr_t)dest, e->p_memsz);
+                if(box64_dynarec)
+                    unprotectDB((uintptr_t)dest, e->p_memsz);
                 #endif
                 if((r=fread(dest, e->p_filesz, 1, f))!=1) {
                     printf_log(LOG_NONE, "Fail to (re)read PT_LOAD part #%d (dest=%p, size=%ld, return=%ld, feof=%d/ferror=%d/%s)\n", i, dest, e->p_filesz, r, feof(f), ferror(f), strerror(ferror(f)));
@@ -577,7 +577,7 @@ int RelocateElfRELA(lib_t *maplib, lib_t *local_maplib, elfheader_t* head, int c
                 if(offs) {
                     // add r_addend to p?
                     printf_log(LOG_DUMP, "Apply R_X86_64_COPY @%p with sym=%s, @%p size=%ld\n", p, symname, (void*)offs, sym->st_size);
-                    memcpy(p, (void*)(offs+rela[i].r_addend), sym->st_size);
+                    memmove(p, (void*)(offs+rela[i].r_addend), sym->st_size);
                 } else {
                     printf_log(LOG_NONE, "Error: Symbol %s not found, cannot apply RELA R_X86_64_COPY @%p (%p) in %s\n", symname, p, *(void**)p, head->name);
                 }
diff --git a/src/emu/x64emu_private.h b/src/emu/x64emu_private.h
index b4415673..05570965 100755
--- a/src/emu/x64emu_private.h
+++ b/src/emu/x64emu_private.h
@@ -10,11 +10,6 @@ typedef struct x64_ucontext_s x64_ucontext_t;
 #define ERR_DIVBY0  2
 #define ERR_ILLEGAL 4
 
-#ifdef DYNAREC
-#define CSTACK      32
-#define CSTACKMASK  31
-#endif
-
 typedef struct forkpty_s {
     void*    amaster;
     void*   name;
diff --git a/src/emu/x64run.c b/src/emu/x64run.c
index 0a393466..5608cc00 100755
--- a/src/emu/x64run.c
+++ b/src/emu/x64run.c
@@ -21,7 +21,7 @@
 #include "bridge.h"
 #include "signals.h"
 #ifdef DYNAREC
-#include "../dynarec/arm_lock_helper.h"
+#include "../dynarec/arm64_lock_helper.h"
 #endif
 
 #include "modrm.h"
@@ -381,15 +381,16 @@ x64emurun:
         case 0x86:                      /* XCHG Eb,Gb */
             nextop = F8;
 #ifdef DYNAREC
-            GET_EB;
+            GETEB(0);
+            GETGB;
             if((nextop&0xC0)==0xC0) { // reg / reg: no lock
                 tmp8u = GB;
                 GB = EB->byte[0];
                 EB->byte[0] = tmp8u;
             } else {
                 do {
-                    tmp8u = arm_lock_read_b(EB);
-                } while(arm_lock_write_b(EB, GB));
+                    tmp8u = arm64_lock_read_b(EB);
+                } while(arm64_lock_write_b(EB, GB));
                 GB = tmp8u;
             }
             // dynarec use need it's own mecanism
@@ -408,21 +409,26 @@ x64emurun:
         case 0x87:                      /* XCHG Ed,Gd */
             nextop = F8;
 #ifdef DYNAREC
-            GET_ED;
+            GETED(0);
+            GETGD;
             if((nextop&0xC0)==0xC0) {
-                tmp32u = GD.dword[0];
-                GD.dword[0] = ED->dword[0];
-                ED->dword[0] = tmp32u;
+                if(rex.w) {
+                    tmp64u = GD->q[0];
+                    GD->q[0] = ED->q[0];
+                    ED->q[0] = tmp64u;
+                } else {
+                    tmp32u = GD->dword[0];
+                    GD->q[0] = ED->dword[0];
+                    ED->q[0] = tmp32u;
+                }
             } else {
-                if(((uintptr_t)ED)&3)
-                {
-                    // not aligned, dont't try to "LOCK"
-                    tmp32u = ED->dword[0];
-                    ED->dword[0] = GD.dword[0];
-                    GD.dword[0] = tmp32u;
+                if(rex.w) {
+                    GD->q[0] = arm64_lock_xchg(ED, GD->q[0]);
                 } else {
-                    // XCHG is supposed to automaticaly LOCK memory bus
-                    GD.dword[0] = arm_lock_xchg(ED, GD.dword[0]);
+                    do {
+                        tmp32u = arm64_lock_read_d(ED);
+                    } while(arm64_lock_write_d(ED, GD->dword[0]));
+                    GD->q[0] = tmp32u;
                 }
             }
 #else
diff --git a/src/emu/x64run0f.c b/src/emu/x64run0f.c
index 8ad1584f..ec0048f4 100644
--- a/src/emu/x64run0f.c
+++ b/src/emu/x64run0f.c
@@ -22,7 +22,7 @@
 #include "bridge.h"

 #include "signals.h"

 #ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

+#include "../dynarec/arm64_lock_helper.h"

 #endif

 

 #include "modrm.h"

diff --git a/src/emu/x64run64.c b/src/emu/x64run64.c
index 20c087ac..efc90e75 100644
--- a/src/emu/x64run64.c
+++ b/src/emu/x64run64.c
@@ -19,10 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64run66.c b/src/emu/x64run66.c
index 0eefff20..377ece8c 100644
--- a/src/emu/x64run66.c
+++ b/src/emu/x64run66.c
@@ -19,9 +19,8 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-//#include "signals.h"

 #ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

+#include "../dynarec/arm64_lock_helper.h"

 #endif

 

 #include "modrm.h"

diff --git a/src/emu/x64run660f.c b/src/emu/x64run660f.c
index db4cbe25..abcf0253 100644
--- a/src/emu/x64run660f.c
+++ b/src/emu/x64run660f.c
@@ -19,10 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64run6664.c b/src/emu/x64run6664.c
index f3001290..c61938be 100644
--- a/src/emu/x64run6664.c
+++ b/src/emu/x64run6664.c
@@ -19,10 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64run66d9.c b/src/emu/x64run66d9.c
index fae424ba..e355ca46 100644
--- a/src/emu/x64run66d9.c
+++ b/src/emu/x64run66d9.c
@@ -18,12 +18,7 @@
 #include "x64trace.h"

 #include "x87emu_private.h"

 #include "box64context.h"

-//#include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64run66dd.c b/src/emu/x64run66dd.c
index 1ef8e93c..8b2b7326 100644
--- a/src/emu/x64run66dd.c
+++ b/src/emu/x64run66dd.c
@@ -18,12 +18,7 @@
 #include "x64trace.h"

 #include "x87emu_private.h"

 #include "box64context.h"

-//#include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64run67.c b/src/emu/x64run67.c
index 1d443611..a956072a 100644
--- a/src/emu/x64run67.c
+++ b/src/emu/x64run67.c
@@ -19,10 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64rund8.c b/src/emu/x64rund8.c
index e0f1017e..60b9e151 100644
--- a/src/emu/x64rund8.c
+++ b/src/emu/x64rund8.c
@@ -19,9 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64rund9.c b/src/emu/x64rund9.c
index 253204e8..b7f2df5f 100644
--- a/src/emu/x64rund9.c
+++ b/src/emu/x64rund9.c
@@ -18,12 +18,7 @@
 #include "x64trace.h"

 #include "x87emu_private.h"

 #include "box64context.h"

-//#include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64runda.c b/src/emu/x64runda.c
index 65115b0c..70a60222 100644
--- a/src/emu/x64runda.c
+++ b/src/emu/x64runda.c
@@ -19,9 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64rundb.c b/src/emu/x64rundb.c
index 772748a6..0cd9c5dd 100644
--- a/src/emu/x64rundb.c
+++ b/src/emu/x64rundb.c
@@ -18,12 +18,7 @@
 #include "x64trace.h"

 #include "x87emu_private.h"

 #include "box64context.h"

-//#include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64rundd.c b/src/emu/x64rundd.c
index 73678e2b..ce6081cc 100644
--- a/src/emu/x64rundd.c
+++ b/src/emu/x64rundd.c
@@ -19,9 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64rundf.c b/src/emu/x64rundf.c
index 40186287..613c90f8 100644
--- a/src/emu/x64rundf.c
+++ b/src/emu/x64rundf.c
@@ -18,12 +18,7 @@
 #include "x64trace.h"

 #include "x87emu_private.h"

 #include "box64context.h"

-//#include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c
index 3b027137..faf1a109 100644
--- a/src/emu/x64runf0.c
+++ b/src/emu/x64runf0.c
@@ -20,9 +20,8 @@
 #include "box64context.h"

 #include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

 #ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

+#include "../dynarec/arm64_lock_helper.h"

 #endif

 

 #include "modrm.h"

@@ -31,9 +30,11 @@ int RunF0(x64emu_t *emu, rex_t rex)
 {

     uint8_t opcode;

     uint8_t nextop;

-    uint32_t tmp32u;

+    uint8_t tmp8u;

+    int32_t tmp32s;

+    uint32_t tmp32u, tmp32u2;

     int64_t tmp64s;

-    uint64_t tmp64u;

+    uint64_t tmp64u, tmp64u2;

     reg64_t *oped, *opgd;

 

     opcode = F8;

@@ -46,42 +47,57 @@ int RunF0(x64emu_t *emu, rex_t rex)
 

     switch(opcode) {

 #ifdef DYNAREC

-        #define GO(B, OP)                      \

-        case B+0: \

-            nextop = F8;               \

-            GETEB(0);             \

+        #define GO(B, OP)                                           \

+        case B+0:                                                   \

+            nextop = F8;                                            \

+            GETEB(0);                                               \

             GETGB;                                                  \

-            do {                \

-            tmp8u = arm_lock_read_b(EB);     \

-            tmp8u = OP##8(emu, tmp8u, GB);  \

-            } while (arm_lock_write_b(EB, tmp8u));   \

-            break;                              \

-        case B+1: \

-            nextop = F8;               \

-            GETED(0);             \

+            do {                                                    \

+                tmp8u = arm64_lock_read_b(EB);                      \

+                tmp8u = OP##8(emu, tmp8u, GB);                      \

+            } while (arm64_lock_write_b(EB, tmp8u));                \

+            break;                                                  \

+        case B+1:                                                   \

+            nextop = F8;                                            \

+            GETED(0);                                               \

             GETGD;                                                  \

-            do {                \

-            tmp32u = arm_lock_read_d(ED);     \

-            tmp32u = OP##32(emu, tmp32u, GD.dword[0]);  \

-            } while (arm_lock_write_d(ED, tmp32u));   \

-            break;                              \

-        case B+2: \

-            nextop = F8;               \

-            GETEB(0);                   \

+            if(rex.w) {                                             \

+                do {                                                \

+                    tmp64u = arm64_lock_read_dd(ED);                \

+                    tmp64u = OP##64(emu, tmp64u, GD->q[0]);         \

+                } while (arm64_lock_write_dd(ED, tmp64u));          \

+            } else {                                                \

+                do {                                                \

+                    tmp32u = arm64_lock_read_d(ED);                 \

+                    tmp32u = OP##32(emu, tmp32u, GD->dword[0]);     \

+                } while (arm64_lock_write_d(ED, tmp32u));           \

+                if(MODREG)                                          \

+                    ED->dword[1] = 0;                               \

+            }                                                       \

+            break;                                                  \

+        case B+2:                                                   \

+            nextop = F8;                                            \

+            GETEB(0);                                               \

             GETGB;                                                  \

-            GB = OP##8(emu, GB, EB->byte[0]); \

-            break;                              \

-        case B+3: \

-            nextop = F8;               \

-            GETED(0);         \

+            GB = OP##8(emu, GB, EB->byte[0]);                       \

+            break;                                                  \

+        case B+3:                                                   \

+            nextop = F8;                                            \

+            GETED(0);                                               \

             GETGD;                                                  \

-            GD.dword[0] = OP##32(emu, GD.dword[0], ED->dword[0]); \

-            break;                              \

-        case B+4: \

-            R_AL = OP##8(emu, R_AL, F8); \

-            break;                              \

-        case B+5: \

-            R_EAX = OP##32(emu, R_EAX, F32); \

+            if(rex.w)                                               \

+                GD->q[0] = OP##64(emu, GD->q[0], ED->q[0]);         \

+            else                                                    \

+                GD->q[0] = OP##32(emu, GD->dword[0], ED->dword[0]); \

+            break;                                                  \

+        case B+4:                                                   \

+            R_AL = OP##8(emu, R_AL, F8);                            \

+            break;                                                  \

+        case B+5:                                                   \

+            if(rex.w)                                               \

+                R_RAX = OP##64(emu, R_RAX, F32S64);                 \

+            else                                                    \

+                R_RAX = OP##32(emu, R_EAX, F32);                    \

             break;

 #else

         #define GO(B, OP)                                           \

@@ -157,32 +173,28 @@ int RunF0(x64emu_t *emu, rex_t rex)
                     GETED(0);

                     GETGD;

 #ifdef DYNAREC

-                    if(((uintptr_t)ED)&3) {

+                    if(rex.w)

                         do {

-                            tmp32u = ED->dword[0] & ~0xff;

-                            tmp32u |= arm_lock_read_b(ED);

-                            cmp32(emu, R_EAX, tmp32u);

+                            tmp64u = arm64_lock_read_dd(ED);

+                            cmp64(emu, R_RAX, tmp64u);

                             if(ACCESS_FLAG(F_ZF)) {

-                                tmp32s = arm_lock_write_b(ED, GD->dword[0] & 0xff);

-                                if(!tmp32s)

-                                    ED->dword[0] = GD.dword[0];

+                                tmp32s = arm64_lock_write_dd(ED, GD->q[0]);

                             } else {

-                                R_EAX = tmp32u;

+                                R_RAX = tmp64u;

                                 tmp32s = 0;

                             }

                         } while(tmp32s);

-                    } else {

+                    else

                         do {

-                            tmp32u = arm_lock_read_d(ED);

+                            tmp32u = arm64_lock_read_d(ED);

                             cmp32(emu, R_EAX, tmp32u);

                             if(ACCESS_FLAG(F_ZF)) {

-                                tmp32s = arm_lock_write_d(ED, GD.dword[0]);

+                                tmp32s = arm64_lock_write_d(ED, GD->dword[0]);

                             } else {

-                                R_EAX = tmp32u;

+                                R_RAX = tmp32u;

                                 tmp32s = 0;

                             }

                         } while(tmp32s);

-                    }

 #else

                     pthread_mutex_lock(&emu->context->mutex_lock);

                     if(rex.w) {

@@ -209,20 +221,21 @@ int RunF0(x64emu_t *emu, rex_t rex)
                     GETED(0);

                     GETGD;

 #ifdef DYNAREC

-                    if(((uintptr_t)ED)&3) {

+                    if(rex.w) {

                         do {

-                            tmp32u = ED->dword[0] & ~0xff;

-                            tmp32u |= arm_lock_read_b(ED);

-                            tmp32u2 = add32(emu, tmp32u, GD.dword[0]);

-                        } while(arm_lock_write_b(ED, tmp32u2&0xff));

-                        ED->dword[0] = tmp32u2;

+                            tmp64u = arm64_lock_read_dd(ED);

+                            tmp64u2 = add64(emu, tmp64u, GD->dword[0]);

+                        } while(arm64_lock_write_dd(ED, tmp64u2));

+                        GD->q[0] = tmp64u;

                     } else {

                         do {

-                            tmp32u = arm_lock_read_d(ED);

-                            tmp32u2 = add32(emu, tmp32u, GD.dword[0]);

-                        } while(arm_lock_write_d(ED, tmp32u2));

+                            tmp32u = arm64_lock_read_d(ED);

+                            tmp32u2 = add32(emu, tmp32u, GD->dword[0]);

+                        } while(arm64_lock_write_d(ED, tmp32u2));

+                        GD->q[0] = tmp32u;

+                        if(MODREG)

+                            ED->dword[1] = 0;

                     }

-                    GD.dword[0] = tmp32u;

 #else

                     pthread_mutex_lock(&emu->context->mutex_lock);

                     if(rex.w) {

@@ -232,7 +245,7 @@ int RunF0(x64emu_t *emu, rex_t rex)
                     } else {

                         tmp32u = add32(emu, ED->dword[0], GD->dword[0]);

                         GD->q[0] = ED->dword[0];

-                        if((nextop&0xC0)==0xC0)

+                        if(MODREG)

                             ED->q[0] = tmp32u;

                         else

                             ED->dword[0] = tmp32u;

@@ -256,15 +269,40 @@ int RunF0(x64emu_t *emu, rex_t rex)
             } else

                 tmp64u = F32S64;

 #ifdef DYNAREC

-            switch((nextop>>3)&7) {

-                case 0: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, add32(emu, tmp32u2, tmp32u))); break;

-                case 1: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED,  or32(emu, tmp32u2, tmp32u))); break;

-                case 2: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, adc32(emu, tmp32u2, tmp32u))); break;

-                case 3: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sbb32(emu, tmp32u2, tmp32u))); break;

-                case 4: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, and32(emu, tmp32u2, tmp32u))); break;

-                case 5: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, sub32(emu, tmp32u2, tmp32u))); break;

-                case 6: do { tmp32u2 = arm_lock_read_d(ED);} while(arm_lock_write_d(ED, xor32(emu, tmp32u2, tmp32u))); break;

-                case 7:                cmp32(emu, ED->dword[0], tmp32u); break;

+            if(rex.w) {

+                switch((nextop>>3)&7) {

+                    case 0: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = add64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 1: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 =  or64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 2: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = adc64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 3: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = sbb64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 4: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = and64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 5: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = sub64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 6: do { tmp64u2 = arm64_lock_read_dd(ED); tmp64u2 = xor64(emu, tmp64u2, tmp64u);} while(arm64_lock_write_dd(ED, tmp64u2)); break;

+                    case 7:                cmp64(emu, ED->q[0], tmp64u); break;

+                }

+            } else {

+                if(MODREG)

+                    switch((nextop>>3)&7) {

+                        case 0: ED->q[0] = add32(emu, ED->dword[0], tmp64u); break;

+                        case 1: ED->q[0] =  or32(emu, ED->dword[0], tmp64u); break;

+                        case 2: ED->q[0] = adc32(emu, ED->dword[0], tmp64u); break;

+                        case 3: ED->q[0] = sbb32(emu, ED->dword[0], tmp64u); break;

+                        case 4: ED->q[0] = and32(emu, ED->dword[0], tmp64u); break;

+                        case 5: ED->q[0] = sub32(emu, ED->dword[0], tmp64u); break;

+                        case 6: ED->q[0] = xor32(emu, ED->dword[0], tmp64u); break;

+                        case 7:            cmp32(emu, ED->dword[0], tmp64u); break;

+                    }

+                else

+                    switch((nextop>>3)&7) {

+                        case 0: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = add32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 1: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 =  or32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 2: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = adc32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 3: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sbb32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 4: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = and32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 5: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = sub32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 6: do { tmp32u2 = arm64_lock_read_d(ED); tmp32u2 = xor32(emu, tmp32u2, tmp64u);} while(arm64_lock_write_d(ED, tmp32u2)); break;

+                        case 7:                cmp32(emu, ED->dword[0], tmp32u); break;

+                    }

             }

 #else

             pthread_mutex_lock(&emu->context->mutex_lock);

diff --git a/src/emu/x64runf20f.c b/src/emu/x64runf20f.c
index 7b8238a8..18ec9645 100644
--- a/src/emu/x64runf20f.c
+++ b/src/emu/x64runf20f.c
@@ -19,10 +19,6 @@
 #include "x87emu_private.h"

 #include "box64context.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/emu/x64runf30f.c b/src/emu/x64runf30f.c
index 9b25e73a..f18aa951 100644
--- a/src/emu/x64runf30f.c
+++ b/src/emu/x64runf30f.c
@@ -18,12 +18,7 @@
 #include "x64trace.h"

 #include "x87emu_private.h"

 #include "box64context.h"

-//#include "my_cpuid.h"

 #include "bridge.h"

-//#include "signals.h"

-#ifdef DYNAREC

-#include "../dynarec/arm_lock_helper.h"

-#endif

 

 #include "modrm.h"

 

diff --git a/src/include/box64context.h b/src/include/box64context.h
index 6e4d9052..c52ac53b 100755
--- a/src/include/box64context.h
+++ b/src/include/box64context.h
@@ -27,6 +27,8 @@ typedef struct dynablocklist_s  dynablocklist_t;
 typedef struct mmaplist_s       mmaplist_t;
 typedef struct kh_dynablocks_s  kh_dynablocks_t;
 #endif
+#define DYNAMAP_SHIFT 16
+#define JMPTABL_SHIFT 16
 
 typedef void* (*procaddess_t)(const char* name);
 typedef void* (*vkprocaddess_t)(void* instance, const char* name);
diff --git a/src/include/custommem.h b/src/include/custommem.h
index a018fd37..d5e8a09a 100644
--- a/src/include/custommem.h
+++ b/src/include/custommem.h
@@ -27,10 +27,10 @@ void addDBFromAddressRange(uintptr_t addr, uintptr_t size);
 void cleanDBFromAddressRange(uintptr_t addr, uintptr_t size, int destroy);
 
 dynablocklist_t* getDB(uintptr_t idx);
-void addJumpTableIfDefault(void* addr, void* jmp);
-void setJumpTableDefault(void* addr);
-uintptr_t getJumpTable();
-uintptr_t getJumpTableAddress(uintptr_t addr);
+void addJumpTableIfDefault64(void* addr, void* jmp);
+void setJumpTableDefault64(void* addr);
+uintptr_t getJumpTable64();
+uintptr_t getJumpTableAddress64(uintptr_t addr);
 #endif
 
 #define PROT_DYNAREC 0x80
diff --git a/src/include/debug.h b/src/include/debug.h
index 873db669..61071b48 100755
--- a/src/include/debug.h
+++ b/src/include/debug.h
@@ -7,6 +7,13 @@ extern int box64_log;    // log level
 extern int box64_dynarec_log;
 extern int box64_dynarec;
 extern int box64_pagesize;
+#ifdef DYNAREC
+extern int box64_dynarec_dump;
+extern int box64_dynarec_trace;
+extern int box64_dynarec_forced;
+extern int box64_dynarec_largest;
+extern uintptr_t box64_nodynarec_start, box64_nodynarec_end;
+#endif
 extern int dlsym_error;  // log dlsym error
 extern int trace_xmm;    // include XMM reg in trace?
 extern int trace_emm;    // include EMM reg in trace?
diff --git a/src/include/dynablock.h b/src/include/dynablock.h
new file mode 100755
index 00000000..76f1e5e0
--- /dev/null
+++ b/src/include/dynablock.h
@@ -0,0 +1,31 @@
+#ifndef __DYNABLOCK_H_
+#define __DYNABLOCK_H_
+
+typedef struct x64emu_s x64emu_t;
+typedef struct dynablock_s dynablock_t;
+typedef struct dynablocklist_s dynablocklist_t;
+typedef struct kh_dynablocks_s  kh_dynablocks_t;
+
+uint32_t X31_hash_code(void* addr, int len);
+dynablocklist_t* NewDynablockList(uintptr_t text, int textsz, int direct);
+void FreeDynablockList(dynablocklist_t** dynablocks);
+void FreeDynablock(dynablock_t* db);
+void MarkDynablock(dynablock_t* db);
+void FreeRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size);
+void MarkRangeDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size);
+
+dynablock_t* FindDynablockFromNativeAddress(void* addr);    // defined in box64context.h
+dynablock_t* FindDynablockDynablocklist(void* addr, kh_dynablocks_t* dynablocks);
+
+uintptr_t StartDynablockList(dynablocklist_t* db);
+uintptr_t EndDynablockList(dynablocklist_t* db);
+void MarkDirectDynablock(dynablocklist_t* dynablocks, uintptr_t addr, uintptr_t size);
+
+// Handling of Dynarec block (i.e. an exectable chunk of x64 translated code)
+dynablock_t* DBGetBlock(x64emu_t* emu, uintptr_t addr, int create, dynablock_t** current);   // return NULL if block is not found / cannot be created. Don't create if create==0
+dynablock_t* DBAlternateBlock(x64emu_t* emu, uintptr_t addr, uintptr_t filladdr);
+
+// Create and Add an new dynablock in the list, handling direct/map
+dynablock_t *AddNewDynablock(dynablocklist_t* dynablocks, uintptr_t addr, int* created);
+
+#endif //__DYNABLOCK_H_
\ No newline at end of file
diff --git a/src/include/dynarec_arm64.h b/src/include/dynarec_arm64.h
new file mode 100755
index 00000000..1225153d
--- /dev/null
+++ b/src/include/dynarec_arm64.h
@@ -0,0 +1,9 @@
+#ifndef __DYNAREC_ARM_H_
+#define __DYNAREC_ARM_H_
+
+typedef struct dynablock_s dynablock_t;
+typedef struct x64emu_s x64emu_t;
+
+void* FillBlock64(dynablock_t* block, uintptr_t addr);
+
+#endif //__DYNAREC_ARM_H_
\ No newline at end of file
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index 4ceac2d7..e3d3002a 100755
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -446,12 +446,12 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int
     // get that actual ESP first!
     x64emu_t *emu = thread_get_emu();
     uintptr_t *frame = (uintptr_t*)R_RSP;
-#if defined(DYNAREC) && defined(__arm__)
+#if defined(DYNAREC) && defined(__aarch64__)
     ucontext_t *p = (ucontext_t *)ucntx;
-    void * pc = (void*)p->uc_mcontext.arm_pc;
+    void * pc = (void*)p->uc_mcontext.pc;
     dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc);
     if(db) {
-        frame = (uint32_t*)p->uc_mcontext.arm_r8;
+        frame = (uintptr_t*)p->uc_mcontext.regs[10+_SP];
     }
 #endif
     // stack tracking
@@ -495,17 +495,25 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int
     sigcontext->uc_mcontext.gregs[X64_EFL] = emu->eflags.x32;
     // get segments
     sigcontext->uc_mcontext.gregs[X64_CSGSFS] = ((uint64_t)(R_CS)) | (((uint64_t)(R_GS))<<16) | (((uint64_t)(R_FS))<<32);
-#if defined(DYNAREC) && defined(__arm__)
+#if defined(DYNAREC) && defined(__aarch64__)
     if(db) {
-        sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.arm_r4;
-        sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.arm_r5;
-        sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.arm_r6;
-        sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.arm_r7;
-        sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.arm_r8;
-        sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.arm_r9;
-        sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.arm_r10;
-        sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.arm_fp;
-        sigcontext->uc_mcontext.gregs[X64_RIP] = getX86Address(db, (uintptr_t)pc);
+        sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.regs[10];
+        sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.regs[11];
+        sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.regs[12];
+        sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.regs[13];
+        sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.regs[14];
+        sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.regs[15];
+        sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.regs[16];
+        sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.regs[17];
+        sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.regs[18];
+        sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.regs[19];
+        sigcontext->uc_mcontext.gregs[X64_R10] = p->uc_mcontext.regs[20];
+        sigcontext->uc_mcontext.gregs[X64_R11] = p->uc_mcontext.regs[21];
+        sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.regs[22];
+        sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.regs[23];
+        sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.regs[24];
+        sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.regs[25];
+        sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc);
     }
 #endif
     // get FloatPoint status
@@ -637,7 +645,7 @@ void my_sigactionhandler_oldcode(int32_t sig, siginfo_t* info, void * ucntx, int
         new_ss->ss_flags = 0;
 }
 
-void my_box86signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
+void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
 {
     // sig==SIGSEGV || sig==SIGBUS || sig==SIGILL here!
     int log_minimum = (my_context->is_sigaction[sig] && sig==SIGSEGV)?LOG_INFO:LOG_NONE;
@@ -659,32 +667,47 @@ void my_box86signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
     dynablock_t* db = NULL;
     int db_searched = 0;
     if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&PROT_DYNAREC)) {
-        if(box86_dynarec_smc) {
-            dynablock_t* db_pc = NULL;
-            db_pc = FindDynablockFromNativeAddress(pc);
-            if(db_pc) {
-                db = FindDynablockFromNativeAddress(addr);
-                db_searched = 1;
-            }
-            if(db_pc && db) {
-                if (db_pc == db) {
-                    dynarec_log(LOG_NONE, "Warning: Access to protected %p from %p, inside same dynablock\n", addr, pc);            
-                }
-            }
-            if(db && db->x86_addr>= addr && (db->x86_addr+db->x86_size)<addr) {
-                dynarec_log(LOG_INFO, "Warning, addr inside current dynablock!\n");
-            }
-        }
-        dynarec_log(LOG_DEBUG, "Access to protected %p from %p, unprotecting memory (prot=%x)\n", addr, pc, prot);
         // access error, unprotect the block (and mark them dirty)
         if(prot&PROT_DYNAREC)   // on heavy multi-thread program, the protection can already be gone...
             unprotectDB((uintptr_t)addr, 1);    // unprotect 1 byte... But then, the whole page will be unprotected
+        // check if SMC inside block
+        if(!db_searched) {
+            db = FindDynablockFromNativeAddress(pc);
+            db_searched = 1;
+        }
+        if(db && (addr>=db->x64_addr && addr<(db->x64_addr+db->x64_size))) {
+            // dynablock got auto-dirty! need to get out of it!!!
+            emu_jmpbuf_t* ejb = GetJmpBuf();
+            if(ejb->jmpbuf_ok) {
+                ejb->emu->regs[_AX].q[0] = p->uc_mcontext.regs[10];
+                ejb->emu->regs[_CX].q[0] = p->uc_mcontext.regs[11];
+                ejb->emu->regs[_DX].q[0] = p->uc_mcontext.regs[12];
+                ejb->emu->regs[_BX].q[0] = p->uc_mcontext.regs[13];
+                ejb->emu->regs[_SP].q[0] = p->uc_mcontext.regs[14];
+                ejb->emu->regs[_BP].q[0] = p->uc_mcontext.regs[15];
+                ejb->emu->regs[_SI].q[0] = p->uc_mcontext.regs[16];
+                ejb->emu->regs[_DI].q[0] = p->uc_mcontext.regs[17];
+                ejb->emu->regs[_R8].q[0] = p->uc_mcontext.regs[18];
+                ejb->emu->regs[_R9].q[0] = p->uc_mcontext.regs[19];
+                ejb->emu->regs[_R10].q[0] = p->uc_mcontext.regs[20];
+                ejb->emu->regs[_R11].q[0] = p->uc_mcontext.regs[21];
+                ejb->emu->regs[_R12].q[0] = p->uc_mcontext.regs[22];
+                ejb->emu->regs[_R13].q[0] = p->uc_mcontext.regs[23];
+                ejb->emu->regs[_R14].q[0] = p->uc_mcontext.regs[24];
+                ejb->emu->regs[_R15].q[0] = p->uc_mcontext.regs[25];
+                ejb->emu->ip.q[0] = getX64Address(db, (uintptr_t)pc);
+                ejb->emu->eflags.x64 = p->uc_mcontext.regs[26];
+                dynarec_log(LOG_DEBUG, "Auto-SMC detected, getting out of current Dynablock!\n");
+                longjmp(ejb->jmpbuf, 2);
+            }
+            dynarec_log(LOG_INFO, "Warning, Auto-SMC (%p for db %p/%p) detected, but jmpbuffer not ready!\n", (void*)addr, db, (void*)db->x64_addr);
+        }
         // done
         if(prot&PROT_WRITE) return; // if there is no write permission, don't return and continue to program signal handling
     } else if ((sig==SIGSEGV) && (addr) && (info->si_code == SEGV_ACCERR) && (prot&(PROT_READ|PROT_WRITE))) {
         db = FindDynablockFromNativeAddress(pc);
         db_searched = 1;
-        if(db && db->x86_addr>= addr && (db->x86_addr+db->x86_size)<addr) {
+        if(db && db->x64_addr>= addr && (db->x64_addr+db->x64_size)<addr) {
             dynarec_log(LOG_INFO, "Warning, addr inside current dynablock!\n");
         }
         if(addr && pc && db) {
@@ -718,13 +741,13 @@ exit(-1);
         x64emu_t* emu = thread_get_emu();
         x64pc = R_RIP;
         rsp = (void*)R_RSP;
-#if defined(__arm__) && defined(DYNAREC)
-        if(db && p->uc_mcontext.arm_r0>0x10000) {
-            emu = (x64emu_t*)p->uc_mcontext.arm_r0;
+#if defined(__aarch64__) && defined(DYNAREC)
+        if(db && p->uc_mcontext.regs[0]>0x10000) {
+            emu = (x64emu_t*)p->uc_mcontext.regs[0];
         }
         if(db) {
             x64pc = getX64Address(db, (uintptr_t)pc);
-            rsp = (void*)p->uc_mcontext.arm_r8;
+            rsp = (void*)p->uc_mcontext.regs[10+_SP];
         }
 #endif
         x64name = getAddrFunctionName(x64pc);
@@ -755,12 +778,12 @@ exit(-1);
 #ifdef DYNAREC
         uint32_t hash = 0;
         if(db)
-            hash = X31_hash_code(db->x86_addr, db->x86_size);
+            hash = X31_hash_code(db->x64_addr, db->x64_size);
         printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p), for accessing %p (code=%d/prot=%x), db=%p(%p:%p/%p:%p/%s:%s, hash:%x/%x)", 
             GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, addr, info->si_code, 
             prot, db, db?db->block:0, db?(db->block+db->size):0, 
-            db?db->x86_addr:0, db?(db->x86_addr+db->x86_size):0, 
-            getAddrFunctionName((uintptr_t)(db?db->x86_addr:0)), (db?db->need_test:0)?"need_stest":"clean", db?db->hash:0, hash);
+            db?db->x64_addr:0, db?(db->x64_addr+db->x64_size):0, 
+            getAddrFunctionName((uintptr_t)(db?db->x64_addr:0)), (db?db->need_test:0)?"need_stest":"clean", db?db->hash:0, hash);
 #else
         printf_log(log_minimum, "%04d|%s @%p (%s) (x64pc=%p/%s:\"%s\", rsp=%p), for accessing %p (code=%d)", GetTID(), signame, pc, name, (void*)x64pc, elfname?elfname:"???", x64name?x64name:"???", rsp, addr, info->si_code);
 #endif
@@ -788,7 +811,7 @@ void my_sigactionhandler(int32_t sig, siginfo_t* info, void * ucntx)
 {
     #ifdef DYNAREC
     ucontext_t *p = (ucontext_t *)ucntx;
-    void * pc = (void*)p->uc_mcontext.arm_pc;
+    void * pc = (void*)p->uc_mcontext.pc;
     dynablock_t* db = FindDynablockFromNativeAddress(pc);
     #else
     void* db = NULL;
@@ -1135,13 +1158,13 @@ void init_signal_helper(box64context_t* context)
     }
 	struct sigaction action = {0};
 	action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
-	action.sa_sigaction = my_box86signalhandler;
+	action.sa_sigaction = my_box64signalhandler;
     sigaction(SIGSEGV, &action, NULL);
 	action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
-	action.sa_sigaction = my_box86signalhandler;
+	action.sa_sigaction = my_box64signalhandler;
     sigaction(SIGBUS, &action, NULL);
 	action.sa_flags = SA_SIGINFO | SA_RESTART | SA_NODEFER;
-	action.sa_sigaction = my_box86signalhandler;
+	action.sa_sigaction = my_box64signalhandler;
     sigaction(SIGILL, &action, NULL);
 
 	pthread_once(&sigstack_key_once, sigstack_key_alloc);
diff --git a/src/libtools/threads.c b/src/libtools/threads.c
index 3e1a8eaa..386960db 100755
--- a/src/libtools/threads.c
+++ b/src/libtools/threads.c
@@ -299,7 +299,7 @@ EXPORT int my_pthread_create(x64emu_t *emu, void* t, void* attr, void* start_rou
 	et->fnc = (uintptr_t)start_routine;
 	et->arg = arg;
 	#ifdef DYNAREC
-	if(box86_dynarec) {
+	if(box64_dynarec) {
 		// pre-creation of the JIT code for the entry point of the thread
 		dynablock_t *current = NULL;
 		DBGetBlock(emu, (uintptr_t)start_routine, 1, &current);
diff --git a/src/main.c b/src/main.c
index 095b61ad..0811136c 100755
--- a/src/main.c
+++ b/src/main.c
@@ -29,7 +29,16 @@ int box64_log = LOG_INFO; //LOG_NONE;
 int box64_nobanner = 0;
 int box64_dynarec_log = LOG_NONE;
 int box64_pagesize;
+#ifdef DYNAREC
+int box64_dynarec = 1;
+int box64_dynarec_dump = 0;
+int box64_dynarec_forced = 0;
+int box64_dynarec_largest = 0;
+uintptr_t box64_nodynarec_start = 0;
+uintptr_t box64_nodynarec_end = 0;
+#else   //DYNAREC
 int box64_dynarec = 0;
+#endif
 int dlsym_error = 0;
 int trace_xmm = 0;
 int trace_emm = 0;
@@ -128,6 +137,61 @@ void LoadLogEnv()
         if(!box64_nobanner)
             printf_log(LOG_INFO, "Debug level is %d\n", box64_log);
     }
+#ifdef DYNAREC
+    p = getenv("BOX64_DYNAREC_DUMP");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[1]<='1')
+                box64_dynarec_dump = p[0]-'0';
+        }
+        if (box64_dynarec_dump) printf_log(LOG_INFO, "Dynarec blocks are dumped%s\n", (box64_dynarec_dump>1)?" in color":"");
+    }
+    p = getenv("BOX64_DYNAREC_LOG");
+    if(p) {
+        if(strlen(p)==1) {
+            if((p[0]>='0'+LOG_NONE) && (p[0]<='0'+LOG_DUMP))
+                box64_dynarec_log = p[0]-'0';
+        } else {
+            if(!strcasecmp(p, "NONE"))
+                box64_dynarec_log = LOG_NONE;
+            else if(!strcasecmp(p, "INFO"))
+                box64_dynarec_log = LOG_INFO;
+            else if(!strcasecmp(p, "DEBUG"))
+                box64_dynarec_log = LOG_DEBUG;
+            else if(!strcasecmp(p, "VERBOSE"))
+                box64_dynarec_log = LOG_DUMP;
+        }
+        printf_log(LOG_INFO, "Dynarec log level is %d\n", box64_dynarec_log);
+    }
+    p = getenv("BOX64_DYNAREC");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[1]<='1')
+                box64_dynarec = p[0]-'0';
+        }
+        printf_log(LOG_INFO, "Dynarec is %s\n", box64_dynarec?"On":"Off");
+    }
+    p = getenv("BOX64_DYNAREC_FORCED");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[1]<='1')
+                box64_dynarec_forced = p[0]-'0';
+        }
+        if(box64_dynarec_forced)
+        printf_log(LOG_INFO, "Dynarec is Forced on all addresses\n");
+    }
+    p = getenv("BOX64_NODYNAREC");
+    if(p) {
+        if (strchr(p,'-')) {
+            if(sscanf(p, "%ld-%ld", &box64_nodynarec_start, &box64_nodynarec_end)!=2) {
+                if(sscanf(p, "0x%lX-0x%lX", &box64_nodynarec_start, &box64_nodynarec_end)!=2)
+                    sscanf(p, "%lx-%lx", &box64_nodynarec_start, &box64_nodynarec_end);
+            }
+            printf_log(LOG_INFO, "No Dynablock creation that start in %p - %p range\n", (void*)box64_nodynarec_start, (void*)box64_nodynarec_end);
+        }
+    }
+
+#endif
 #ifdef HAVE_TRACE
     p = getenv("BOX64_TRACE_XMM");
     if(p) {
@@ -149,6 +213,17 @@ void LoadLogEnv()
         start_cnt = strtoll(p, &p2, 10);
         printf_log(LOG_INFO, "Will start trace only after %lu instructions\n", start_cnt);
     }
+#ifdef DYNAREC
+    p = getenv("BOX64_DYNAREC_TRACE");
+    if(p) {
+        if(strlen(p)==1) {
+            if(p[0]>='0' && p[1]<='0'+1)
+                box64_dynarec_trace = p[0]-'0';
+            if(box64_dynarec_trace)
+                printf_log(LOG_INFO, "Dynarec generated code will also print a trace\n");
+        }
+    }
+#endif
 #endif
     // grab BOX64_TRACE_FILE envvar, and change %pid to actual pid is present in the name
     openFTrace();
@@ -345,6 +420,11 @@ void PrintHelp() {
     printf(" BOX64_LD_LIBRARY_PATH is the box64 version LD_LIBRARY_PATH (default is '.:lib')\n");
     printf(" BOX64_LOG with 0/1/2/3 or NONE/INFO/DEBUG/DUMP to set the printed debug info\n");
     printf(" BOX64_NOBANNER with 0/1 to enable/disable the printing of box64 version and build at start\n");
+#ifdef DYNAREC
+    printf(" BOX64_DYNAREC_LOG with 0/1/2/3 or NONE/INFO/DEBUG/DUMP to set the printed dynarec info\n");
+    printf(" BOX64_DYNAREC with 0/1 to disable or enable Dynarec (On by default)\n");
+    printf(" BOX64_NODYNAREC with address interval (0x1234-0x4567) to forbid dynablock creation in the interval specified\n");
+#endif
 #ifdef HAVE_TRACE
     printf(" BOX64_TRACE with 1 to enable x86_64 execution trace\n");
     printf("    or with XXXXXX-YYYYYY to enable x86_64 execution trace only between address\n");
@@ -353,6 +433,9 @@ void PrintHelp() {
     printf(" BOX64_TRACE_EMM with 1 to enable dump of MMX registers along with regular registers\n");
     printf(" BOX64_TRACE_XMM with 1 to enable dump of SSE registers along with regular registers\n");
     printf(" BOX64_TRACE_START with N to enable trace after N instructions\n");
+#ifdef DYNAREC
+    printf(" BOX64_DYNAREC_TRACE with 0/1 to disable or enable Trace on generated code too\n");
+#endif
 #endif
     printf(" BOX64_TRACE_FILE with FileName to redirect logs in a file");
     printf(" BOX64_DLSYM_ERROR with 1 to log dlsym errors\n");
@@ -765,7 +848,7 @@ int main(int argc, const char **argv, const char **env) {
     if(ElfCheckIfUseTCMallocMinimal(elf_header)) {
         if(!box64_tcmalloc_minimal) {
             // need to reload with tcmalloc_minimal as a LD_PRELOAD!
-            printf_log(LOG_INFO, "BOX86: tcmalloc_minimal.so.4 used, reloading box64 with the lib preladed\n");
+            printf_log(LOG_INFO, "BOX64: tcmalloc_minimal.so.4 used, reloading box64 with the lib preladed\n");
             // need to get a new envv variable. so first count it and check if LD_PRELOAD is there
             int preload=(getenv("LD_PRELOAD"))?1:0;
             int nenv = 0;
@@ -805,7 +888,7 @@ int main(int argc, const char **argv, const char **env) {
             if(execve(newargv[0], newargv, newenv)<0)
                 printf_log(LOG_NONE, "Failed to relaunch, error is %d/%s\n", errno, strerror(errno));
         } else {
-            printf_log(LOG_INFO, "BOX86: Using tcmalloc_minimal.so.4, and it's in the LD_PRELOAD command\n");
+            printf_log(LOG_INFO, "BOX64: Using tcmalloc_minimal.so.4, and it's in the LD_PRELOAD command\n");
         }
     }
     // get and alloc stack size and align
diff --git a/src/tools/bridge.c b/src/tools/bridge.c
index 3724e0b3..a89f32ca 100755
--- a/src/tools/bridge.c
+++ b/src/tools/bridge.c
@@ -5,7 +5,7 @@
 #include <dlfcn.h>
 #include <pthread.h>
 
-//#include "custommem.h"
+#include "custommem.h"
 #include "bridge.h"
 #include "bridge_private.h"
 #include "khash.h"
@@ -146,7 +146,8 @@ uintptr_t AddAutomaticBridge(x64emu_t* emu, bridge_t* bridge, wrapper_t w, void*
         addAlternate(fnc, (void*)ret);
         #ifdef DYNAREC
         // now, check if dynablock at native address exist
-        DBAlternateBlock(emu, (uintptr_t)fnc, ret);
+        if(box64_dynarec)
+            DBAlternateBlock(emu, (uintptr_t)fnc, ret);
         #endif
     }
     return ret;