about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-03-12 14:04:57 +0100
committerGitHub <noreply@github.com>2023-03-12 14:04:57 +0100
commit262ec3ed3c9fdf8f5028c55f616565266fc53e4b (patch)
treee4713396644f8dcc524643b5b4e9778ed9d27fea
parent38a5e55745b0e2474dcda1ff4636424af2f37bad (diff)
downloadbox64-262ec3ed3c9fdf8f5028c55f616565266fc53e4b.tar.gz
box64-262ec3ed3c9fdf8f5028c55f616565266fc53e4b.zip
Rv64 dynarec (#550)
* [RV64_DYNAREC] Pushed the rv64_lock.h
* [RV64_DYNAREC] Add initial support for atomic functions
* [RV64_DYNAREC] Added some basic infrastructure for the Dynarec (and 1 opcode)
* [RV64_DYNAREC] Add a disassembler for RV64 instructions
* [RV64_DYNAREC] Added 86 MOV opcode, and some fixes too
* [RV64_DYNAREC] Added 8D LEA opcode
* [RV64_DYNAREC] Added POP reg opcode
* [RV64_DYNAREC] Various fixes and small optims

---------

Co-authored-by: Yang Liu <liuyang22@iscas.ac.cn>
-rwxr-xr-xCMakeLists.txt56
-rwxr-xr-xdocs/COMPILE.md2
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_helper.c62
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_helper.h6
-rwxr-xr-xsrc/dynarec/dynarec_arch.h9
-rwxr-xr-xsrc/dynarec/dynarec_helper.h2
-rwxr-xr-xsrc/dynarec/dynarec_native.c2
-rwxr-xr-xsrc/dynarec/dynarec_native_pass.c55
-rw-r--r--src/dynarec/dynarec_next.h7
-rwxr-xr-xsrc/dynarec/native_lock.h40
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c121
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c207
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.h26
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c323
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h490
-rw-r--r--src/dynarec/rv64/dynarec_rv64_jmpnext.c13
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass0.h50
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass1.h11
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass2.h19
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h53
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h80
-rw-r--r--src/dynarec/rv64/rv64_emitter.h223
-rw-r--r--src/dynarec/rv64/rv64_epilog.S61
-rw-r--r--src/dynarec/rv64/rv64_lock.S173
-rw-r--r--src/dynarec/rv64/rv64_lock.h60
-rw-r--r--src/dynarec/rv64/rv64_next.S54
-rw-r--r--src/dynarec/rv64/rv64_printer.c1368
-rw-r--r--src/dynarec/rv64/rv64_printer.h8
-rw-r--r--src/dynarec/rv64/rv64_prolog.S53
-rwxr-xr-xsrc/emu/x64run.c8
-rw-r--r--src/emu/x64run66f0.c3
-rw-r--r--src/emu/x64runf0.c3
-rw-r--r--src/include/dynarec_rv64.h10
-rwxr-xr-xsrc/libtools/signals.c75
-rwxr-xr-xsrc/main.c3
35 files changed, 3680 insertions, 56 deletions
diff --git a/CMakeLists.txt b/CMakeLists.txt
index 7e48c601..7b2ccf6d 100755
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -30,23 +30,27 @@ if(LARCH64)
     set(LD80BITS OFF CACHE BOOL "")
     set(NOALIGN OFF CACHE BOOL "")
     set(ARM_DYNAREC OFF CACHE BOOL "")
+    set(RV64_DYNAREC OFF CACHE BOOL "")
 endif()
 if(RV64)
     set(LD80BITS OFF CACHE BOOL "")
     set(NOALIGN OFF CACHE BOOL "")
-    set(ARM_DYNAREC OFF CACHE BOOL "")  
+    set(ARM_DYNAREC OFF CACHE BOOL "")
+    set(RV64_DYNAREC ON CACHE BOOL "")
     set(PAGE16K OFF CACHE BOOL "")
 endif()
 if(PPC64LE)
     set(LD80BITS OFF CACHE BOOL "")
     set(NOALIGN OFF CACHE BOOL "")
     set(ARM_DYNAREC OFF CACHE BOOL "")
+    set(RV64_DYNAREC OFF CACHE BOOL "")
     set(PAGE16K OFF CACHE BOOL "")
 endif()
 if(RK3399 OR RK3588 OR ODROIDN2 OR RPI3ARM64 OR RPI4ARM64 OR RK3326 OR TEGRAX1 OR PHYTIUM OR SD845 OR LX2160A OR M1)
     set(LD80BITS OFF CACHE BOOL "")
     set(NOALIGN OFF CACHE BOOL "")
     set(ARM_DYNAREC ON CACHE BOOL "")
+    set(RV64_DYNAREC OFF CACHE BOOL "")
 endif()
 if(RK3399 OR RK3588 OR ODROIDN2 OR RPI3ARM64 OR RPI4ARM64 OR RK3326 OR TEGRAX1 OR PHYTIUM OR SD845 OR LX2160A)
     set(PAGE16K OFF CACHE BOOL "")
@@ -66,6 +70,7 @@ endif()
 option(LD80BITS "Set to ON if host device have 80bits long double (i.e. i386)" ${LD80BITS})
 option(NOALIGN "Set to ON if host device doesn't need re-align (i.e. i386)" ${NOALIGN})
 option(ARM_DYNAREC "Set to ON to use ARM Dynamic Recompilation" ${ARM_DYNAREC})
+option(RV64_DYNAREC "Set to ON to use RISC-V Dynamic Recompilation" ${RV64_DYNAREC})
 option(PAGE8K "Set to ON if host device have PageSize of 8K (instead of 4K)" ${PAGE8K})
 option(PAGE16K "Set to ON if host device have PageSize of 16K (instead of 4K)" ${PAGE16K})
 option(PAGE64K "Set to ON if host device have PageSize of 64K (instead of 4K)" ${PAGE64K})
@@ -208,6 +213,12 @@ if(ARM_DYNAREC)
     enable_language(ASM)
     include_directories("${BOX64_ROOT}/src/dynarec/arm64")
     set(DYNAREC ON)
+elseif(RV64_DYNAREC)
+    add_definitions(-DDYNAREC)
+    add_definitions(-DRV64)
+    enable_language(ASM)
+    include_directories("${BOX64_ROOT}/src/dynarec/rv64")
+    set(DYNAREC ON)
 else()
     set(DYNAREC OFF)
 endif()
@@ -575,6 +586,49 @@ if(ARM_DYNAREC)
     )
 endif()
 
+if(RV64_DYNAREC)
+    set(DYNAREC_SRC
+        ${DYNAREC_SRC}
+
+        "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_functions.c"
+        "${BOX64_ROOT}/src/dynarec/rv64/rv64_printer.c"
+        "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_jmpnext.c"
+
+        "${BOX64_ROOT}/src/dynarec/rv64/rv64_prolog.S"
+        "${BOX64_ROOT}/src/dynarec/rv64/rv64_epilog.S"
+        "${BOX64_ROOT}/src/dynarec/rv64/rv64_next.S"
+        "${BOX64_ROOT}/src/dynarec/rv64/rv64_lock.S"
+    )
+
+    set(DYNAREC_PASS
+    "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_helper.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_emit_tests.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_emit_math.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_emit_logic.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_emit_shift.c"
+    "${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_00.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_0f.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_64.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_65.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_66.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_67.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_d8.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_d9.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_da.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_db.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_dc.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_dd.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_de.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_df.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_f0.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_660f.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_6664.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_66f0.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_f20f.c"
+    #"${BOX64_ROOT}/src/dynarec/rv64/dynarec_rv64_f30f.c"
+    )
+endif()
+
 if(DYNAREC)
     set(DYNAREC_PASS 
         "${BOX64_ROOT}/src/wrapped/generated/wrapper.h" 
diff --git a/docs/COMPILE.md b/docs/COMPILE.md
index c40cbcea..919506ea 100755
--- a/docs/COMPILE.md
+++ b/docs/COMPILE.md
@@ -257,7 +257,7 @@ The `-DCMAKE_BUILD_TYPE=RelWithDebInfo` argument makes a build that is both opti
 

 #### To have a Trace Enabled build 

 

-To have a trace enabled build (***the interpreter will be slightly slower***), add `-DHAVE_TRACE=1`. But you will need to have the [Zydis library](https://github.com/zyantific/zydis) in your `LD_LIBRARY_PATH` or in the system library folders at runtime.

+To have a trace enabled build (***the interpreter will be slightly slower***), add `-DHAVE_TRACE=1`. But you will need to have the [Zydis library](https://github.com/zyantific/zydis) in your `LD_LIBRARY_PATH` or in the system library folders at runtime. Use version v3.2.1, as later version changed the API and will no longer work with box64

 

 #### To have ARM Dynarec

 

diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index aa09934c..4d5d875f 100755
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -1753,3 +1753,65 @@ void emit_pf(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
     MVNw_REG(s4, s4);
     BFIw(xFlags, s4, F_PF, 1);
 }
+
+
+void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n)
+{
+    MESSAGE(LOG_DEBUG, "Reset Caches with %d\n",reset_n);
+    #if STEP > 1
+    // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap)
+    dyn->n = dyn->insts[ninst].n;
+    neoncacheUnwind(&dyn->n);
+    #ifdef HAVE_TRACE
+    if(box64_dynarec_dump)
+        if(memcmp(&dyn->n, &dyn->insts[reset_n].n, sizeof(neon_cache_t))) {
+            MESSAGE(LOG_DEBUG, "Warning, difference in neoncache: reset=");
+            for(int i=0; i<24; ++i)
+                if(dyn->insts[reset_n].n.neoncache[i].v)
+                    MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[reset_n].n.neoncache[i].t, dyn->insts[reset_n].n.neoncache[i].n));
+            if(dyn->insts[reset_n].n.combined1 || dyn->insts[reset_n].n.combined2)
+                MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[reset_n].n.swapped?"SWP":"CMB", dyn->insts[reset_n].n.combined1, dyn->insts[reset_n].n.combined2);
+            if(dyn->insts[reset_n].n.stack_push || dyn->insts[reset_n].n.stack_pop)
+                MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[reset_n].n.stack_push, -dyn->insts[reset_n].n.stack_pop);
+            MESSAGE(LOG_DEBUG, " ==> ");
+            for(int i=0; i<24; ++i)
+                if(dyn->insts[ninst].n.neoncache[i].v)
+                    MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[ninst].n.neoncache[i].t, dyn->insts[ninst].n.neoncache[i].n));
+            if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2)
+                MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[ninst].n.swapped?"SWP":"CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2);
+            if(dyn->insts[ninst].n.stack_push || dyn->insts[ninst].n.stack_pop)
+                MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[ninst].n.stack_push, -dyn->insts[ninst].n.stack_pop);
+            MESSAGE(LOG_DEBUG, " -> ");
+            for(int i=0; i<24; ++i)
+                if(dyn->n.neoncache[i].v)
+                    MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->n.neoncache[i].t, dyn->n.neoncache[i].n));
+            if(dyn->n.combined1 || dyn->n.combined2)
+                MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->n.swapped?"SWP":"CMB", dyn->n.combined1, dyn->n.combined2);
+            if(dyn->n.stack_push || dyn->n.stack_pop)
+                MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->n.stack_push, -dyn->n.stack_pop);
+            MESSAGE(LOG_DEBUG, "\n");
+        }
+    #endif //HAVE_TRACE
+    #else
+    dyn->n = dyn->insts[reset_n].n;
+    #endif
+}
+
+// propagate ST stack state, especial stack pop that are defered
+void fpu_propagate_stack(dynarec_arm_t* dyn, int ninst)
+{
+    if(dyn->n.stack_pop) {
+        for(int j=0; j<24; ++j)
+            if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D || dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) {
+                if(dyn->n.neoncache[j].n<dyn->n.stack_pop)
+                    dyn->n.neoncache[j].v = 0;
+                else
+                    dyn->n.neoncache[j].n-=dyn->n.stack_pop;
+            }
+        dyn->n.stack_pop = 0;
+    }
+    dyn->n.stack = dyn->n.stack_next;
+    dyn->n.news = 0;
+    dyn->n.stack_push = 0;
+    dyn->n.swapped = 0;
+}
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index f13331ac..ba830b21 100755
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -969,6 +969,8 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define fpu_pushcache   STEPNAME(fpu_pushcache)
 #define fpu_popcache    STEPNAME(fpu_popcache)
 #define fpu_reset       STEPNAME(fpu_reset)
+#define fpu_reset_cache STEPNAME(fpu_reset_cache)
+#define fpu_propagate_stack STEPNAME(fpu_propagate_stack)
 #define fpu_purgecache  STEPNAME(fpu_purgecache)
 #define mmx_purgecache  STEPNAME(mmx_purgecache)
 #define x87_purgecache  STEPNAME(x87_purgecache)
@@ -1154,6 +1156,10 @@ void sse_purge07cache(dynarec_arm_t* dyn, int ninst, int s1);
 // common coproc helpers
 // reset the cache
 void fpu_reset(dynarec_arm_t* dyn);
+// reset the cache with n
+void fpu_reset_cache(dynarec_arm_t* dyn, int ninst, int reset_n);
+// propagate stack state
+void fpu_propagate_stack(dynarec_arm_t* dyn, int ninst);
 // purge the FPU cache (needs 3 scratch registers)
 void fpu_purgecache(dynarec_arm_t* dyn, int ninst, int next, int s1, int s2, int s3);
 // purge MMX cache
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index e5e98658..d1d30599 100755
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -18,6 +18,15 @@
 #define dynarec_native_t            dynarec_la464_t

 

 

+#elif defined(RV64)

+#include "rv64/rv64_printer.h"

+#include "rv64/dynarec_rv64_private.h"

+#include "rv64/dynarec_rv64_functions.h"

+

+#define instruction_native_t        instruction_rv64_t

+#define dynarec_native_t            dynarec_rv64_t

+

+

 #else

 #error Unsupported platform

 #endif

diff --git a/src/dynarec/dynarec_helper.h b/src/dynarec/dynarec_helper.h
index 19f48dcb..2f4b82fa 100755
--- a/src/dynarec/dynarec_helper.h
+++ b/src/dynarec/dynarec_helper.h
@@ -5,6 +5,8 @@
 #include "arm64/dynarec_arm64_helper.h"

 #elif defined(LA464)

 #include "la464/dynarec_la464_helper.h"

+#elif defined(RV64)

+#include "rv64/dynarec_rv64_helper.h"

 #else

 #error Unsupported architecture

 #endif

diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index c3f01ff2..06c4878c 100755
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -325,7 +325,7 @@ static void fillPredecessors(dynarec_native_t* dyn)
 }
 
 // updateNeed goes backward, from last intruction to top
-static int updateNeed(dynarec_arm_t* dyn, int ninst, uint8_t need) {
+static int updateNeed(dynarec_native_t* dyn, int ninst, uint8_t need) {
     while (ninst>=0) {
         // need pending but instruction is only a subset: remove pend and use an X_ALL instead
         need |= dyn->insts[ninst].x64.need_after;
diff --git a/src/dynarec/dynarec_native_pass.c b/src/dynarec/dynarec_native_pass.c
index ca8da160..14f85b40 100755
--- a/src/dynarec/dynarec_native_pass.c
+++ b/src/dynarec/dynarec_native_pass.c
@@ -60,44 +60,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
                 dyn->f.pending = 0;
                 fpu_reset(dyn);
             } else {
-                MESSAGE(LOG_DEBUG, "Reset Caches with %d\n",reset_n);
-                #if STEP > 1
-                // for STEP 2 & 3, just need to refrest with current, and undo the changes (push & swap)
-                dyn->n = dyn->insts[ninst].n;
-                neoncacheUnwind(&dyn->n);
-                #ifdef HAVE_TRACE
-                if(box64_dynarec_dump)
-                    if(memcmp(&dyn->n, &dyn->insts[reset_n].n, sizeof(neon_cache_t))) {
-                        MESSAGE(LOG_DEBUG, "Warning, difference in neoncache: reset=");
-                        for(int i=0; i<24; ++i)
-                            if(dyn->insts[reset_n].n.neoncache[i].v)
-                                MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[reset_n].n.neoncache[i].t, dyn->insts[reset_n].n.neoncache[i].n));
-                        if(dyn->insts[reset_n].n.combined1 || dyn->insts[reset_n].n.combined2)
-                            MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[reset_n].n.swapped?"SWP":"CMB", dyn->insts[reset_n].n.combined1, dyn->insts[reset_n].n.combined2);
-                        if(dyn->insts[reset_n].n.stack_push || dyn->insts[reset_n].n.stack_pop)
-                            MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[reset_n].n.stack_push, -dyn->insts[reset_n].n.stack_pop);
-                        MESSAGE(LOG_DEBUG, " ==> ");
-                        for(int i=0; i<24; ++i)
-                            if(dyn->insts[ninst].n.neoncache[i].v)
-                                MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->insts[ninst].n.neoncache[i].t, dyn->insts[ninst].n.neoncache[i].n));
-                        if(dyn->insts[ninst].n.combined1 || dyn->insts[ninst].n.combined2)
-                            MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->insts[ninst].n.swapped?"SWP":"CMB", dyn->insts[ninst].n.combined1, dyn->insts[ninst].n.combined2);
-                        if(dyn->insts[ninst].n.stack_push || dyn->insts[ninst].n.stack_pop)
-                            MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->insts[ninst].n.stack_push, -dyn->insts[ninst].n.stack_pop);
-                        MESSAGE(LOG_DEBUG, " -> ");
-                        for(int i=0; i<24; ++i)
-                            if(dyn->n.neoncache[i].v)
-                                MESSAGE(LOG_DEBUG, " %02d:%s", i, getCacheName(dyn->n.neoncache[i].t, dyn->n.neoncache[i].n));
-                        if(dyn->n.combined1 || dyn->n.combined2)
-                            MESSAGE(LOG_DEBUG, " %s:%02d/%02d", dyn->n.swapped?"SWP":"CMB", dyn->n.combined1, dyn->n.combined2);
-                        if(dyn->n.stack_push || dyn->n.stack_pop)
-                            MESSAGE(LOG_DEBUG, " (%d:%d)", dyn->n.stack_push, -dyn->n.stack_pop);
-                        MESSAGE(LOG_DEBUG, "\n");
-                    }
-                #endif //HAVE_TRACE
-                #else
-                dyn->n = dyn->insts[reset_n].n;
-                #endif
+                fpu_reset_cache(dyn, ninst, reset_n);
                 dyn->f = dyn->insts[reset_n].f_exit;
                 if(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT) {
                     MESSAGE(LOG_DEBUG, "Apply Barrier Float\n");
@@ -112,21 +75,7 @@ uintptr_t native_pass(dynarec_native_t* dyn, uintptr_t addr)
             reset_n = -1;
         } else if(ninst && (dyn->insts[ninst].pred_sz!=1 || dyn->insts[ninst].pred[0]!=ninst-1))
             dyn->last_ip = 0;   // reset IP if some jump are comming here
-        // propagate ST stack state, especial stack pop that are defered
-        if(dyn->n.stack_pop) {
-            for(int j=0; j<24; ++j)
-                if((dyn->n.neoncache[j].t == NEON_CACHE_ST_D || dyn->n.neoncache[j].t == NEON_CACHE_ST_F)) {
-                    if(dyn->n.neoncache[j].n<dyn->n.stack_pop)
-                        dyn->n.neoncache[j].v = 0;
-                    else
-                        dyn->n.neoncache[j].n-=dyn->n.stack_pop;
-                }
-            dyn->n.stack_pop = 0;
-        }
-        dyn->n.stack = dyn->n.stack_next;
-        dyn->n.news = 0;
-        dyn->n.stack_push = 0;
-        dyn->n.swapped = 0;
+        fpu_propagate_stack(dyn, ninst);
         NEW_INST;
         if(dyn->insts[ninst].pred_sz>1) {SMSTART();}
         fpu_reset_scratch(dyn);
diff --git a/src/dynarec/dynarec_next.h b/src/dynarec/dynarec_next.h
index 19d426b8..0ad6c18d 100644
--- a/src/dynarec/dynarec_next.h
+++ b/src/dynarec/dynarec_next.h
@@ -15,6 +15,13 @@ void la464_epilog() EXPORTDYN;
 #define native_next         la464_next
 #define native_prolog       la464_prolog
 #define native_epilog       la464_epilog
+#elif defined(RV64)
+void rv64_next(void) EXPORTDYN;
+void rv64_prolog(x64emu_t* emu, void* addr) EXPORTDYN;
+void rv64_epilog() EXPORTDYN;
+#define native_next         rv64_next
+#define native_prolog       rv64_prolog
+#define native_epilog       rv64_epilog
 #else
 #error Unsupported architecture
 #endif
diff --git a/src/dynarec/native_lock.h b/src/dynarec/native_lock.h
index 32cf56fa..2134673e 100755
--- a/src/dynarec/native_lock.h
+++ b/src/dynarec/native_lock.h
@@ -27,6 +27,46 @@
 #define native_lock_decifnot0(A)            arm64_lock_decifnot0(A)

 #define native_lock_store(A, B)             arm64_lock_store(A, B)

 

+#elif defined(RV64)

+#include "rv64/rv64_lock.h"

+

+#define USE_CAS

+// RV64 is quite strict (or at least strongly recommand) on what you can do between an LD.A and an SD.A

+// That basicaly forbid to call a function, so there cannot be READ / WRITE separated

+// And so need to use a Compare and Swap mecanism instead

+

+// no byte or 2-bytes atomic access on RISC-V

+#define native_lock_xchg(A, B)              rv64_lock_xchg(A, B)

+#define native_lock_xchg_d(A, B)            rv64_lock_xchg_d(A, B)

+#define native_lock_storeifref(A, B, C)     rv64_lock_storeifref(A, B, C)

+#define native_lock_storeifref_d(A, B, C)   rv64_lock_storeifref_d(A, B, C)

+#define native_lock_storeifref2_d(A, B, C)  rv64_lock_storeifref2_d(A, B, C)

+#define native_lock_storeifnull(A, B)       rv64_lock_storeifnull(A, B)

+#define native_lock_storeifnull_d(A, B)     rv64_lock_storeifnull_d(A, B)

+#define native_lock_decifnot0b(A)           rv64_lock_decifnot0b(A)

+#define native_lock_storeb(A, B)            rv64_lock_storeb(A, B)

+#define native_lock_incif0(A)               rv64_lock_incif0(A)

+#define native_lock_decifnot0(A)            rv64_lock_decifnot0(A)

+#define native_lock_store(A, B)             rv64_lock_store(A, B)

+#define native_lock_cas_d(A, B, C)          rv64_lock_cas_d(A, B, C)

+#define native_lock_cas_dd(A, B, C)         rv64_lock_cas_dd(A, B, C)

+

+#define native_lock_xchg_b(A, B)            rv64_lock_xchg_b(A, B)

+#define native_lock_cas_b(A, B, C)          rv64_lock_cas_b(A, B, C)

+#define native_lock_cas_h(A, B, C)          rv64_lock_cas_h(A, B, C)

+

+#define native_lock_read_b(A)               tmpcas=*(uint8_t*)(A)

+#define native_lock_write_b(A, B)           rv64_lock_cas_b(A, tmpcas, B)

+#define native_lock_read_h(A)               tmpcas=*(uint16_t*)(A)

+#define native_lock_write_h(A, B)           rv64_lock_cas_h(A, tmpcas, B)

+#define native_lock_read_d(A)               tmpcas=*(uint32_t*)(A)

+#define native_lock_write_d(A, B)           rv64_lock_cas_d(A, tmpcas, B)

+#define native_lock_read_dd(A)              tmpcas=*(uint64_t*)(A)

+#define native_lock_write_dd(A, B)          rv64_lock_cas_dd(A, tmpcas, B)

+// there is no atomic move on 16bytes, so faking it

+#define native_lock_read_dq(A, B, C)        *A=tmpcas=((uint64_t*)(C))[0]; *B=((uint64_t*)(C))[1];

+#define native_lock_write_dq(A, B, C)       rv64_lock_cas_dq(C, A, tmpcas, B);

+

 #else

 #error Unsupported architecture

 #endif

diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
new file mode 100644
index 00000000..8fa698a0
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -0,0 +1,121 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <pthread.h>
+#include <errno.h>
+#include <signal.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "bridge.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "custommem.h"
+
+#include "rv64_printer.h"
+#include "dynarec_rv64_private.h"
+#include "dynarec_rv64_functions.h"
+#include "dynarec_rv64_helper.h"
+
+int isSimpleWrapper(wrapper_t fun);
+
+uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog)
+{
+    uint8_t nextop, opcode;
+    uint8_t gd, ed;
+    int8_t i8;
+    int32_t i32, tmp;
+    int64_t i64, j64;
+    uint8_t u8;
+    uint8_t gb1, gb2, eb1, eb2;
+    uint32_t u32;
+    uint64_t u64;
+    uint8_t wback, wb1, wb2, wb;
+    int64_t fixedaddress;
+    int lock;
+    int cacheupd = 0;
+
+    opcode = F8;
+    MAYUSE(eb1);
+    MAYUSE(eb2);
+    MAYUSE(j64);
+    MAYUSE(wb);
+    MAYUSE(lock);
+    MAYUSE(cacheupd);
+
+    switch(opcode) {
+
+        case 0x50:
+        case 0x51:
+        case 0x52:
+        case 0x53:
+        case 0x54:
+        case 0x55:
+        case 0x56:
+        case 0x57:
+            INST_NAME("PUSH reg");
+            gd = xRAX+(opcode&0x07)+(rex.b<<3);
+            SD(gd, xRSP, -8);
+            SUBI(xRSP, xRSP, 8);
+            break;
+
+        case 0x89:
+            INST_NAME("MOV Ed, Gd");
+            nextop=F8;
+            GETGD;
+            if(MODREG) {   // reg <= reg
+                MVxw(xRAX+(nextop&7)+(rex.b<<3), gd);
+            } else {                    // mem <= reg
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
+                SDxw(gd, ed, fixedaddress);
+                SMWRITELOCK(lock);
+            }
+            break;
+
+        case 0x8D:
+            INST_NAME("LEA Gd, Ed");
+            nextop=F8;
+            GETGD;
+            if(MODREG) {   // reg <= reg? that's an invalid operation
+                DEFAULT;
+            } else {                    // mem <= reg
+                addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0);
+                if(gd!=ed) {    // it's sometimes used as a 3 bytes NOP
+                    MV(gd, ed);
+                }
+                else if(!rex.w) {
+                    ZEROUP(gd);   //truncate the higher 32bits as asked
+                }
+            }
+            break;
+
+        case 0x58:
+        case 0x59:
+        case 0x5A:
+        case 0x5B:
+        case 0x5C:
+        case 0x5D:
+        case 0x5E:
+        case 0x5F:
+            INST_NAME("POP reg");
+            gd = xRAX+(opcode&0x07)+(rex.b<<3);
+            LD(gd, xRSP, 0);
+            if(gd!=xRSP) {
+                ADDI(xRSP, xRSP, 8);
+            }
+            break;
+
+        default:
+            DEFAULT;
+    }
+
+     return addr;
+}
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
new file mode 100644
index 00000000..b26e1175
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -0,0 +1,207 @@
+#define _GNU_SOURCE
+#include <stdio.h>
+#include <stdlib.h>
+#include <pthread.h>
+#include <errno.h>
+#include <string.h>
+#include <math.h>
+#include <signal.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "tools/bridge_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "emu/x87emu_private.h"
+#include "x64trace.h"
+#include "signals.h"
+#include "dynarec_rv64.h"
+#include "dynarec_rv64_private.h"
+#include "dynarec_rv64_functions.h"
+#include "custommem.h"
+#include "bridge.h"
+#include "rv64_lock.h"
+
+void fpu_reset_scratch(dynarec_rv64_t* dyn)
+{
+    //TODO
+}
+
+#define F8      *(uint8_t*)(addr++)
+#define F32     *(uint32_t*)(addr+=4, addr-4)
+#define F32S64  (uint64_t)(int64_t)*(int32_t*)(addr+=4, addr-4)
+// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD
+int getedparity(dynarec_rv64_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta)
+{
+    (void)dyn; (void)ninst;
+
+    uint32_t tested = (1<<parity)-1;
+    if((nextop&0xC0)==0xC0)
+        return 0;   // direct register, no parity...
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            int sib_reg = (sib>>3)&7;
+            if((sib&0x7)==5) {
+                uint64_t tmp = F32S64;
+                if (sib_reg!=4) {
+                    // if XXXXXX+reg<<N then check parity of XXXXX and N should be enough
+                    return ((tmp&tested)==0 && (sib>>6)>=parity)?1:0;
+                } else {
+                    // just a constant...
+                    return (tmp&tested)?0:1;
+                }
+            } else {
+                if(sib_reg==4 && parity<3)
+                    return 0;   // simple [reg]
+                // don't try [reg1 + reg2<<N], unless reg1 is ESP
+                return ((sib&0x7)==4 && (sib>>6)>=parity)?1:0;
+            }
+        } else if((nextop&7)==5) {
+            uint64_t tmp = F32S64;
+            tmp+=addr+delta;
+            return (tmp&tested)?0:1;
+        } else {
+            return 0;
+        }
+    } else {
+        return 0; //Form [reg1 + reg2<<N + XXXXXX]
+    }
+}
+
+// Do the GETED, but don't emit anything...
+uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop) 
+{
+    (void)dyn; (void)addr; (void)ninst;
+
+    if((nextop&0xC0)==0xC0)
+        return addr;
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            if((sib&0x7)==5) {
+                addr+=4;
+            }
+        } else if((nextop&7)==5) {
+            addr+=4;
+        }
+    } else {
+        if((nextop&7)==4) {
+            ++addr;
+        }
+        if(nextop&0x80) {
+            addr+=4;
+        } else {
+            ++addr;
+        }
+    }
+    return addr;
+}
+#undef F8
+#undef F32
+
+int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn)
+{
+    (void)dyn;
+
+#define PK(a)       *(uint8_t*)(addr+a)
+#define PK32(a)     *(int32_t*)(addr+a)
+
+    if(!addr || !getProtection(addr))
+        return 0;
+    if(PK(0)==0xff && PK(1)==0x25) {            // "absolute" jump, maybe the GOT (well, RIP relative in fact)
+        uintptr_t a1 = addr + 6 + (PK32(2));    // need to add a check to see if the address is from the GOT !
+        addr = (uintptr_t)getAlternate(*(void**)a1);
+    }
+    if(!addr || !getProtection(addr))
+        return 0;
+    onebridge_t *b = (onebridge_t*)(addr);
+    if(b->CC==0xCC && b->S=='S' && b->C=='C' && b->w!=(wrapper_t)0 && b->f!=(uintptr_t)PltResolver) {
+        // found !
+        if(retn) *retn = (b->C3==0xC2)?b->N:0;
+        if(calladdress) *calladdress = addr+1;
+        return 1;
+    }
+    return 0;
+#undef PK32
+#undef PK
+}
+
+// is inst clean for a son branch?
+int isInstClean(dynarec_rv64_t* dyn, int ninst)
+{
+    // check flags cache
+    if(dyn->insts[ninst].f_entry.dfnone || dyn->insts[ninst].f_entry.pending)
+        return 0;
+    if(dyn->insts[ninst].x64.state_flags)
+        return 0;
+    return 1;
+}
+
+int isPred(dynarec_rv64_t* dyn, int ninst, int pred) {
+    for(int i=0; i<dyn->insts[ninst].pred_sz; ++i)
+        if(dyn->insts[ninst].pred[i]==pred)
+            return pred;
+    return -1;
+}
+int getNominalPred(dynarec_rv64_t* dyn, int ninst) {
+    if((ninst<=0) || !dyn->insts[ninst].pred_sz)
+        return -1;
+    if(isPred(dyn, ninst, ninst-1)!=-1)
+        return ninst-1;
+    return dyn->insts[ninst].pred[0];
+}
+
+uint8_t extract_byte(uint32_t val, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    return (val>>idx)&0xff;
+}
+uint32_t insert_byte(uint32_t val, uint8_t b, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    val&=~(0xff<<idx);
+    val|=(((uint32_t)b)<<idx);
+    return val;
+}
+
+// will go badly if address is unaligned
+uint16_t extract_half(uint32_t val, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    return (val>>idx)&0xffff;
+}
+uint32_t insert_half(uint32_t val, uint16_t h, void* address){
+    int idx = (((uintptr_t)address)&3)*8;
+    val&=~(0xffff<<idx);
+    val|=(((uint32_t)h)<<idx);
+    return val;
+}
+
+uint8_t rv64_lock_xchg_b(void* addr, uint8_t val)
+{
+    uint32_t ret;
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    do {
+        ret = *aligned;
+    } while(rv64_lock_cas_d(aligned, ret, insert_byte(ret, val, addr)));
+    return extract_byte(ret, addr);
+}
+
+int rv64_lock_cas_b(void* addr, uint8_t ref, uint8_t val)
+{
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    uint32_t tmp = *aligned;
+    return rv64_lock_cas_d(aligned, tmp, insert_byte(tmp, val, addr));
+}
+
+int rv64_lock_cas_h(void* addr, uint16_t ref, uint16_t val)
+{
+    uint32_t* aligned = (uint32_t*)(((uintptr_t)addr)&~3);
+    uint32_t tmp = *aligned;
+    return rv64_lock_cas_d(aligned, tmp, insert_half(tmp, val, addr));
+}
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h
new file mode 100644
index 00000000..79ff47a3
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_functions.h
@@ -0,0 +1,26 @@
+#ifndef __DYNAREC_RV64_FUNCTIONS_H__
+#define __DYNAREC_RV64_FUNCTIONS_H__
+#include <stdint.h>
+
+typedef struct x64emu_s x64emu_t;
+typedef struct dynarec_rv64_s dynarec_rv64_t;
+
+// Reset scratch regs counter
+void fpu_reset_scratch(dynarec_rv64_t* dyn);
+
+// Get if ED will have the correct parity. Not emiting anything. Parity is 2 for DWORD or 3 for QWORD
+int getedparity(dynarec_rv64_t* dyn, int ninst, uintptr_t addr, uint8_t nextop, int parity, int delta);
+// Do the GETED, but don't emit anything...
+uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
+
+// Is what pointed at addr a native call? And if yes, to what function?
+int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn);
+
+// predecessor access
+int isPred(dynarec_rv64_t* dyn, int ninst, int pred);
+int getNominalPred(dynarec_rv64_t* dyn, int ninst);
+
+// is inst clean for a son branch?
+int isInstClean(dynarec_rv64_t* dyn, int ninst);
+
+#endif //__DYNAREC_RV64_FUNCTIONS_H__
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
new file mode 100644
index 00000000..3dee0d98
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -0,0 +1,323 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <pthread.h>
+#include <errno.h>
+#include <assert.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_native.h"
+#include "../dynablock_private.h"
+#include "../tools/bridge_private.h"
+#include "custommem.h"
+
+#include "rv64_printer.h"
+#include "dynarec_rv64_private.h"
+#include "dynarec_rv64_functions.h"
+#include "dynarec_rv64_helper.h"
+
+/* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */
+uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int *l, int i12, int delta)
+{
+    MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta);
+
+    int lock = l?((l==LOCK_LOCK)?1:2):0;
+    if(lock==2)
+        *l = 0;
+    uint8_t ret = x2;
+    *fixaddress = 0;
+    if(hint>0) ret = hint;
+    MAYUSE(scratch);
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            int sib_reg = ((sib>>3)&7)+(rex.x<<3);
+            if((sib&0x7)==5) {
+                int64_t tmp = F32S;
+                if (sib_reg!=4) {
+                    if(tmp && ((tmp<-2048) || (tmp>2047) || !i12)) {
+                        MOV64x(scratch, tmp);
+                        SLLI(ret, xRAX+sib_reg, (sib>>6));
+                        ADD(ret, ret, scratch);
+                    } else {
+                        SLLI(ret, xRAX+sib_reg, (sib>>6));
+                        *fixaddress = tmp;
+                    }
+                } else {
+                    switch(lock) {
+                        case 1: addLockAddress(tmp); break;
+                        case 2: if(isLockAddress(tmp)) *l=1; break;
+                    }
+                    MOV64x(ret, tmp);
+                }
+            } else {
+                if (sib_reg!=4) {
+                    SLLI(scratch, xRAX+sib_reg, (sib>>6));
+                    ADD(ret, xRAX+(sib&0x7)+(rex.b<<3), scratch);
+                } else {
+                    ret = xRAX+(sib&0x7)+(rex.b<<3);
+                }
+            }
+        } else if((nextop&7)==5) {
+            int64_t tmp = F32S64;
+            if(i12 && (tmp>=-2048) && (tmp<=2047)) {
+                GETIP(addr+delta);
+                ret = xRIP;
+                *fixaddress = tmp;
+            } else if((tmp>=-2048) && (tmp<=2047)) {
+                GETIP(addr+delta);
+                ADDI(ret, xRIP, tmp);
+            } else if(tmp+addr+delta<0x100000000LL) {
+                MOV64x(ret, tmp+addr+delta);
+            } else {
+                MOV64x(ret, tmp);
+                GETIP(addr+delta);
+                ADD(ret, ret, xRIP);
+            }
+            switch(lock) {
+                case 1: addLockAddress(addr+delta+tmp); break;
+                case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break;
+            }
+        } else {
+            ret = xRAX+(nextop&7)+(rex.b<<3);
+        }
+    } else {
+        int64_t i64;
+        uint8_t sib = 0;
+        int sib_reg = 0;
+        if((nextop&7)==4) {
+            sib = F8;
+            sib_reg = ((sib>>3)&7)+(rex.x<<3);
+        }
+        if(nextop&0x80)
+            i64 = F32S;
+        else 
+            i64 = F8S;
+        if(i64==0 || ((i64>=-2048) && (i64<=2047)  && i12)) {
+            *fixaddress = i64;
+            if((nextop&7)==4) {
+                if (sib_reg!=4) {
+                    SLLI(scratch, xRAX+sib_reg, (sib>>6));
+                    ADD(ret, xRAX+(sib&0x07)+(rex.b<<3), scratch);
+                } else {
+                    ret = xRAX+(sib&0x07)+(rex.b<<3);
+                }
+            } else
+                ret = xRAX+(nextop&0x07)+(rex.b<<3);
+        } else {
+            if(i64>=-2048 && i64<=2047) {
+                if((nextop&7)==4) {
+                    if (sib_reg!=4) {
+                        SLLI(scratch, xRAX+sib_reg, (sib>>6));
+                        ADD(scratch, xRAX+(sib&0x07)+(rex.b<<3), scratch);
+                    } else {
+                        scratch = xRAX+(sib&0x07)+(rex.b<<3);
+                    }
+                } else
+                    scratch = xRAX+(nextop&0x07)+(rex.b<<3);
+                ADDI(ret, scratch, i64);
+            } else {
+                MOV64x(scratch, i64);
+                if((nextop&7)==4) {
+                    if (sib_reg!=4) {
+                        ADD(scratch, scratch, xRAX+(sib&0x07)+(rex.b<<3));
+                        SLLI(ret, xRAX+sib_reg, (sib>>6));
+                        ADD(ret, scratch, ret);
+                    } else {
+                        PASS3(int tmp = xRAX+(sib&0x07)+(rex.b<<3));
+                        ADD(ret, tmp, scratch);
+                    }
+                } else {
+                    PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3));
+                    ADD(ret, tmp, scratch);
+                }
+            }
+        }
+    }
+    *ed = ret;
+    return addr;
+}
+
+void jump_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst)
+{
+    MAYUSE(dyn); MAYUSE(ip); MAYUSE(ninst);
+    MESSAGE(LOG_DUMP, "Jump to epilog\n");
+
+    if(reg) {
+        if(reg!=xRIP) {
+            MV(xRIP, reg);
+        }
+    } else {
+        GETIP_(ip);
+    }
+    TABLE64(x2, (uintptr_t)rv64_epilog);
+    SMEND();
+    BR(x2);
+}
+
+void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst)
+{
+    MAYUSE(dyn); MAYUSE(ninst);
+    MESSAGE(LOG_DUMP, "Jump to next\n");
+
+    if(reg) {
+        if(reg!=xRIP) {
+            MV(xRIP, reg);
+        }
+        uintptr_t tbl = getJumpTable64();
+        MAYUSE(tbl);
+        TABLE64(x3, tbl);
+        SRLI(x2, xRIP, JMPTABL_START3);
+        SLLI(x2, x2, 3);
+        LD(x3, x3, x2);
+        LUI(x4, JMPTABLE_MASK2);    // x4 = mask
+        SRLI(x2, xRIP, JMPTABL_START2);
+        AND(x2, x2, x4);
+        SLLI(x2, x2, 3);
+        LD(x3, x3, x2);
+        if(JMPTABLE_MASK2!=JMPTABLE_MASK1) {
+            LUI(x4, JMPTABLE_MASK1);    // x4 = mask
+        }
+        SRLI(x2, xRIP, JMPTABL_START1);
+        AND(x2, x2, x4);
+        SLLI(x2, x2, 3);
+        LD(x3, x3, x2);
+        if(JMPTABLE_MASK1!=JMPTABLE_MASK0) {
+            LUI(x4, JMPTABLE_MASK0);    // x4 = mask
+        }
+        AND(x2, x2, x4);
+        SLLI(x2, x2, 3);
+        LD(x2, x3, x2);
+    } else {
+        uintptr_t p = getJumpTableAddress64(ip);
+        MAYUSE(p);
+        TABLE64(x3, p);
+        GETIP_(ip);
+        LD(x2, x3, 0);
+    }
+    if(reg!=A1) {
+        MV(A1, xRIP);
+    }
+    CLEARIP();
+    #ifdef HAVE_TRACE
+    //MOVx(x3, 15);    no access to PC reg 
+    #endif
+    SMEND();
+    JALR(x2); // save LR...
+}
+
+void fpu_reset(dynarec_rv64_t* dyn)
+{
+    //TODO
+}
+
+void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n)
+{
+    //TODO
+}
+
+void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3)
+{
+    //TODO
+}
+
+// propagate ST stack state, especial stack pop that are defered
+void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst)
+{
+    //TODO
+}
+
+void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1)
+{
+    // TODO
+}
+
+void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3)
+{
+    //TODO
+}
+
+#ifdef HAVE_TRACE
+void fpu_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    //TODO
+}
+#endif
+void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
+{
+    //TODO
+}
+void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
+{
+    //TODO
+}
+
+void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val)
+{
+    int32_t up=(val>>12);
+    int32_t r = val-(up<<12);
+    // check if there is the dreaded sign bit on imm12
+    if(r&0b100000000000 && r!=0xffffffff) {
+        ++up;
+        r = val-(up<<12);
+    }
+    LUI(reg, up);
+    if(r) {
+        ADDI(reg, reg, r);
+    }
+}
+void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val)
+{
+    if(((val<<(64-12))>>(64-12))==val) {
+        // simple 12bit value
+        MOV_U12(reg, (val&0b111111111111));
+        return;
+    }
+    if(((val<<32)>>32)==val) {
+        // 32bits value
+        rv64_move32(dyn, ninst, reg, val);
+        return;
+    }
+    if((val&0xffffffffLL)==val && (val&0x80000000)) {
+        // 32bits value, but with a sign bit
+        rv64_move32(dyn, ninst, reg, val);
+        ZEROUP(reg);
+        return;
+    }
+    //TODO: optimize that later
+    // Start with the upper 32bits
+    rv64_move32(dyn, ninst, reg, val>>32);
+    // now the lower part
+    uint32_t r = val&0xffffffff;
+    int s = 11;
+    if((r>>21)&0b11111111111) {
+        SLLI(reg, reg, s);
+        ORI(reg, reg, (r>>21)&0b11111111111);
+        s = 0;
+    }
+    s+=11;
+    if((r>>10)&0b11111111111) {
+        SLLI(reg, reg, s);
+        ORI(reg, reg, (r>>10)&0b11111111111);
+        s = 0;
+    }
+    s+=10;
+    if(r&0b1111111111) {
+        SLLI(reg, reg, s);
+        ORI(reg, reg, r&0b1111111111);
+        s=0;
+    }
+    if(s) {
+        SLLI(reg, reg, s);
+    }
+}
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
new file mode 100644
index 00000000..2add1717
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -0,0 +1,490 @@
+#ifndef __DYNAREC_RV64_HELPER_H__
+#define __DYNAREC_RV64_HELPER_H__
+
+// undef to get Close to SSE Float->int conversions
+//#define PRECISE_CVT
+
+#if STEP == 0
+#include "dynarec_rv64_pass0.h"
+#elif STEP == 1
+#include "dynarec_rv64_pass1.h"
+#elif STEP == 2
+#include "dynarec_rv64_pass2.h"
+#elif STEP == 3
+#include "dynarec_rv64_pass3.h"
+#endif
+
+#include "debug.h"
+#include "rv64_emitter.h"
+#include "../emu/x64primop.h"
+
+#define F8      *(uint8_t*)(addr++)
+#define F8S     *(int8_t*)(addr++)
+#define F16     *(uint16_t*)(addr+=2, addr-2)
+#define F16S    *(int16_t*)(addr+=2, addr-2)
+#define F32     *(uint32_t*)(addr+=4, addr-4)
+#define F32S    *(int32_t*)(addr+=4, addr-4)
+#define F32S64  (uint64_t)(int64_t)F32S
+#define F64     *(uint64_t*)(addr+=8, addr-8)
+#define PK(a)   *(uint8_t*)(addr+a)
+#define PK16(a)   *(uint16_t*)(addr+a)
+#define PK32(a)   *(uint32_t*)(addr+a)
+#define PK64(a)   *(uint64_t*)(addr+a)
+#define PKip(a)   *(uint8_t*)(ip+a)
+
+
+// Strong mem emulation helpers
+// Sequence of Read will trigger a DMB on "first" read if strongmem is 2
+// Squence of Write will trigger a DMB on "last" write if strongmem is 1
+// Opcode will read
+#define SMREAD()    if(!dyn->smread && box64_dynarec_strongmem>1) {SMDMB();}
+// Opcode will read with option forced lock
+#define SMREADLOCK(lock)    if(lock || (!dyn->smread && box64_dynarec_strongmem>1)) {SMDMB();}
+// Opcode migh read (depend on nextop)
+#define SMMIGHTREAD()   if(!MODREG) {SMREAD();}
+// Opcode has wrote
+#define SMWRITE()   dyn->smwrite=1
+// Opcode has wrote (strongmem>1 only)
+#define SMWRITE2()   if(box64_dynarec_strongmem>1) dyn->smwrite=1
+// Opcode has wrote with option forced lock
+#define SMWRITELOCK(lock)   if(lock) {SMDMB();} else dyn->smwrite=1
+// Opcode migh have wrote (depend on nextop)
+#define SMMIGHTWRITE()   if(!MODREG) {SMWRITE();}
+// Start of sequence
+#define SMSTART()   SMEND()
+// End of sequence
+#define SMEND()     if(dyn->smwrite && box64_dynarec_strongmem) {FENCE();} dyn->smwrite=0; dyn->smread=0;
+// Force a Data memory barrier (for LOCK: prefix)
+#define SMDMB()     FENCE(); dyn->smwrite=0; dyn->smread=1
+
+//LOCK_* define
+#define LOCK_LOCK   (int*)1
+
+// GETGD    get x64 register in gd
+#define GETGD   gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3)
+//GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
+#define GETED(D)  if(MODREG) {                          \
+                    ed = xRAX+(nextop&7)+(rex.b<<3);    \
+                    wback = 0;                          \
+                } else {                                \
+                    SMREAD()                            \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \
+                    LD_I12(x1, wback, fixedaddress);    \
+                    ed = x1;                            \
+                }
+                
+#ifndef MAYSETFLAGS
+#define MAYSETFLAGS()
+#endif
+
+#ifndef READFLAGS
+#define READFLAGS(A) \
+
+#endif
+
+#ifndef SETFLAGS
+#define SETFLAGS(A, B)                                                                          \
+
+#endif
+#ifndef JUMP
+#define JUMP(A, C) 
+#endif
+#ifndef BARRIER
+#define BARRIER(A) 
+#endif
+#ifndef BARRIER_NEXT
+#define BARRIER_NEXT(A)
+#endif
+
+#ifndef DEFAULT
+#define DEFAULT      *ok = -1; BARRIER(2)
+#endif
+
+#ifndef TABLE64
+#define TABLE64(A, V)
+#endif
+
+#if STEP < 2
+#define GETIP(A)
+#define GETIP_(A)
+#else
+// put value in the Table64 even if not using it for now to avoid difference between Step2 and Step3. Needs to be optimized later...
+#define GETIP(A)                                        \
+    if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) {     \
+        uint64_t _delta_ip = (A)-dyn->last_ip;          \
+        dyn->last_ip += _delta_ip;                      \
+        if(_delta_ip) {                                 \
+            ADDI(xRIP, xRIP, _delta_ip);                \
+        }                                               \
+    } else {                                            \
+        dyn->last_ip = (A);                             \
+        if(dyn->last_ip<0xffffffff) {                   \
+            MOV64x(xRIP, dyn->last_ip);                 \
+        } else                                          \
+            TABLE64(xRIP, dyn->last_ip);                \
+    }
+#define GETIP_(A)                                       \
+    if(dyn->last_ip && ((A)-dyn->last_ip)<0x1000) {     \
+        uint64_t _delta_ip = (A)-dyn->last_ip;          \
+        if(_delta_ip) {ADDI(xRIP, xRIP, _delta_ip);}    \
+    } else {                                            \
+        if((A)<0xffffffff) {                            \
+            MOV64x(xRIP, (A));                          \
+        } else                                          \
+            TABLE64(xRIP, (A));                         \
+    }
+#endif
+#define CLEARIP()   dyn->last_ip=0
+
+
+#define MODREG  ((nextop&0xC0)==0xC0)
+
+void rv64_epilog();
+void* rv64_next(x64emu_t* emu, uintptr_t addr);
+
+#ifndef STEPNAME
+#define STEPNAME3(N,M) N##M
+#define STEPNAME2(N,M) STEPNAME3(N,M)
+#define STEPNAME(N) STEPNAME2(N, STEP)
+#endif
+
+#define native_pass        STEPNAME(native_pass)
+
+#define dynarec64_00       STEPNAME(dynarec64_00)
+#define dynarec64_0F       STEPNAME(dynarec64_0F)
+#define dynarec64_64       STEPNAME(dynarec64_64)
+#define dynarec64_65       STEPNAME(dynarec64_65)
+#define dynarec64_66       STEPNAME(dynarec64_66)
+#define dynarec64_67       STEPNAME(dynarec64_67)
+#define dynarec64_D8       STEPNAME(dynarec64_D8)
+#define dynarec64_D9       STEPNAME(dynarec64_D9)
+#define dynarec64_DA       STEPNAME(dynarec64_DA)
+#define dynarec64_DB       STEPNAME(dynarec64_DB)
+#define dynarec64_DC       STEPNAME(dynarec64_DC)
+#define dynarec64_DD       STEPNAME(dynarec64_DD)
+#define dynarec64_DE       STEPNAME(dynarec64_DE)
+#define dynarec64_DF       STEPNAME(dynarec64_DF)
+#define dynarec64_F0       STEPNAME(dynarec64_F0)
+#define dynarec64_660F     STEPNAME(dynarec64_660F)
+#define dynarec64_6664     STEPNAME(dynarec64_6664)
+#define dynarec64_66F0     STEPNAME(dynarec64_66F0)
+#define dynarec64_F20F     STEPNAME(dynarec64_F20F)
+#define dynarec64_F30F     STEPNAME(dynarec64_F30F)
+
+#define geted           STEPNAME(geted)
+#define geted32         STEPNAME(geted32)
+#define geted16         STEPNAME(geted16)
+#define jump_to_epilog  STEPNAME(jump_to_epilog)
+#define jump_to_next    STEPNAME(jump_to_next)
+#define ret_to_epilog   STEPNAME(ret_to_epilog)
+#define retn_to_epilog  STEPNAME(retn_to_epilog)
+#define iret_to_epilog  STEPNAME(iret_to_epilog)
+#define call_c          STEPNAME(call_c)
+#define call_n          STEPNAME(call_n)
+#define grab_segdata    STEPNAME(grab_segdata)
+#define emit_cmp8       STEPNAME(emit_cmp8)
+#define emit_cmp16      STEPNAME(emit_cmp16)
+#define emit_cmp32      STEPNAME(emit_cmp32)
+#define emit_cmp8_0     STEPNAME(emit_cmp8_0)
+#define emit_cmp16_0    STEPNAME(emit_cmp16_0)
+#define emit_cmp32_0    STEPNAME(emit_cmp32_0)
+#define emit_test8      STEPNAME(emit_test8)
+#define emit_test16     STEPNAME(emit_test16)
+#define emit_test32     STEPNAME(emit_test32)
+#define emit_add32      STEPNAME(emit_add32)
+#define emit_add32c     STEPNAME(emit_add32c)
+#define emit_add8       STEPNAME(emit_add8)
+#define emit_add8c      STEPNAME(emit_add8c)
+#define emit_sub32      STEPNAME(emit_sub32)
+#define emit_sub32c     STEPNAME(emit_sub32c)
+#define emit_sub8       STEPNAME(emit_sub8)
+#define emit_sub8c      STEPNAME(emit_sub8c)
+#define emit_or32       STEPNAME(emit_or32)
+#define emit_or32c      STEPNAME(emit_or32c)
+#define emit_xor32      STEPNAME(emit_xor32)
+#define emit_xor32c     STEPNAME(emit_xor32c)
+#define emit_and32      STEPNAME(emit_and32)
+#define emit_and32c     STEPNAME(emit_and32c)
+#define emit_or8        STEPNAME(emit_or8)
+#define emit_or8c       STEPNAME(emit_or8c)
+#define emit_xor8       STEPNAME(emit_xor8)
+#define emit_xor8c      STEPNAME(emit_xor8c)
+#define emit_and8       STEPNAME(emit_and8)
+#define emit_and8c      STEPNAME(emit_and8c)
+#define emit_add16      STEPNAME(emit_add16)
+#define emit_add16c     STEPNAME(emit_add16c)
+#define emit_sub16      STEPNAME(emit_sub16)
+#define emit_sub16c     STEPNAME(emit_sub16c)
+#define emit_or16       STEPNAME(emit_or16)
+#define emit_or16c      STEPNAME(emit_or16c)
+#define emit_xor16      STEPNAME(emit_xor16)
+#define emit_xor16c     STEPNAME(emit_xor16c)
+#define emit_and16      STEPNAME(emit_and16)
+#define emit_and16c     STEPNAME(emit_and16c)
+#define emit_inc32      STEPNAME(emit_inc32)
+#define emit_inc16      STEPNAME(emit_inc16)
+#define emit_inc8       STEPNAME(emit_inc8)
+#define emit_dec32      STEPNAME(emit_dec32)
+#define emit_dec16      STEPNAME(emit_dec16)
+#define emit_dec8       STEPNAME(emit_dec8)
+#define emit_adc32      STEPNAME(emit_adc32)
+#define emit_adc32c     STEPNAME(emit_adc32c)
+#define emit_adc8       STEPNAME(emit_adc8)
+#define emit_adc8c      STEPNAME(emit_adc8c)
+#define emit_adc16      STEPNAME(emit_adc16)
+#define emit_adc16c     STEPNAME(emit_adc16c)
+#define emit_sbb32      STEPNAME(emit_sbb32)
+#define emit_sbb32c     STEPNAME(emit_sbb32c)
+#define emit_sbb8       STEPNAME(emit_sbb8)
+#define emit_sbb8c      STEPNAME(emit_sbb8c)
+#define emit_sbb16      STEPNAME(emit_sbb16)
+#define emit_sbb16c     STEPNAME(emit_sbb16c)
+#define emit_neg32      STEPNAME(emit_neg32)
+#define emit_neg16      STEPNAME(emit_neg16)
+#define emit_neg8       STEPNAME(emit_neg8)
+#define emit_shl32      STEPNAME(emit_shl32)
+#define emit_shl32c     STEPNAME(emit_shl32c)
+#define emit_shr32      STEPNAME(emit_shr32)
+#define emit_shr32c     STEPNAME(emit_shr32c)
+#define emit_sar32c     STEPNAME(emit_sar32c)
+#define emit_rol32c     STEPNAME(emit_rol32c)
+#define emit_ror32c     STEPNAME(emit_ror32c)
+#define emit_shrd32c    STEPNAME(emit_shrd32c)
+#define emit_shld32c    STEPNAME(emit_shld32c)
+
+#define emit_pf         STEPNAME(emit_pf)
+
+#define x87_do_push     STEPNAME(x87_do_push)
+#define x87_do_push_empty STEPNAME(x87_do_push_empty)
+#define x87_do_pop      STEPNAME(x87_do_pop)
+#define x87_get_current_cache   STEPNAME(x87_get_current_cache)
+#define x87_get_cache   STEPNAME(x87_get_cache)
+#define x87_get_neoncache STEPNAME(x87_get_neoncache)
+#define x87_get_st      STEPNAME(x87_get_st)
+#define x87_get_st_empty  STEPNAME(x87_get_st)
+#define x87_refresh     STEPNAME(x87_refresh)
+#define x87_forget      STEPNAME(x87_forget)
+#define x87_reget_st    STEPNAME(x87_reget_st)
+#define x87_stackcount  STEPNAME(x87_stackcount)
+#define x87_swapreg     STEPNAME(x87_swapreg)
+#define x87_setround    STEPNAME(x87_setround)
+#define x87_restoreround STEPNAME(x87_restoreround)
+#define sse_setround    STEPNAME(sse_setround)
+#define mmx_get_reg     STEPNAME(mmx_get_reg)
+#define mmx_get_reg_empty STEPNAME(mmx_get_reg_empty)
+#define sse_get_reg     STEPNAME(sse_get_reg)
+#define sse_get_reg_empty STEPNAME(sse_get_reg_empty)
+#define sse_forget_reg   STEPNAME(sse_forget_reg)
+#define sse_purge07cache STEPNAME(sse_purge07cache)
+
+#define fpu_pushcache   STEPNAME(fpu_pushcache)
+#define fpu_popcache    STEPNAME(fpu_popcache)
+#define fpu_reset       STEPNAME(fpu_reset)
+#define fpu_reset_cache STEPNAME(fpu_reset_cache)
+#define fpu_propagate_stack STEPNAME(fpu_propagate_stack)
+#define fpu_purgecache  STEPNAME(fpu_purgecache)
+#define mmx_purgecache  STEPNAME(mmx_purgecache)
+#define x87_purgecache  STEPNAME(x87_purgecache)
+#ifdef HAVE_TRACE
+#define fpu_reflectcache STEPNAME(fpu_reflectcache)
+#endif
+
+#define CacheTransform       STEPNAME(CacheTransform)
+#define rv64_move64     STEPNAME(rv64_move64)
+#define rv64_move32     STEPNAME(rv64_move32)
+
+/* setup r2 to address pointed by */
+uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int* l, int i12, int delta);
+
+/* setup r2 to address pointed by */
+//uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, rex_t rex, int* l, int s, int delta);
+
+/* setup r2 to address pointed by */
+//uintptr_t geted16(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, int64_t* fixaddress, int absmax, uint32_t mask, int s);
+
+
+// generic x64 helper
+void jump_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
+void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
+//void ret_to_epilog(dynarec_rv64_t* dyn, int ninst);
+//void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n);
+//void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits);
+//void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
+//void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w);
+//void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int segment);
+//void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+//void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+//void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
+//void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
+//void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+//void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+//void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
+//void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
+//void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_sub8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
+//void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
+//void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
+//void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
+//void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_or8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_add16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_sub16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_or16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_xor16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_and16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
+//void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
+//void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_adc32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_adc8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
+//void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_adc16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_sbb32c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
+//void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+//void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+//void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
+//void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+//void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+//void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
+//void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+//void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+//void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+//void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+//void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+//void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+
+//void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+
+// x87 helper
+// cache of the local stack counter, to avoid upadte at every call
+//void x87_stackcount(dynarec_rv64_t* dyn, int ninst, int scratch);
+// fpu push. Return the Dd value to be used
+//int x87_do_push(dynarec_rv64_t* dyn, int ninst, int s1, int t);
+// fpu push. Do not allocate a cache register. Needs a scratch register to do x87stack synch (or 0 to not do it)
+//void x87_do_push_empty(dynarec_rv64_t* dyn, int ninst, int s1);
+// fpu pop. All previous returned Dd should be considered invalid
+//void x87_do_pop(dynarec_rv64_t* dyn, int ninst, int s1);
+// get cache index for a x87 reg, return -1 if cache doesn't exist
+//int x87_get_current_cache(dynarec_rv64_t* dyn, int ninst, int st, int t);
+// get cache index for a x87 reg, create the entry if needed
+//int x87_get_cache(dynarec_rv64_t* dyn, int ninst, int populate, int s1, int s2, int a, int t);
+// get neoncache index for a x87 reg
+//int x87_get_neoncache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a);
+// get vfpu register for a x87 reg, create the entry if needed
+//int x87_get_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int t);
+// get vfpu register for a x87 reg, create the entry if needed. Do not fetch the Stx if not already in cache
+//int x87_get_st_empty(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int t);
+// refresh a value from the cache ->emu (nothing done if value is not cached)
+//void x87_refresh(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st);
+// refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached)
+//void x87_forget(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st);
+// refresh the cache value from emu
+//void x87_reget_st(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int st);
+// swap 2 x87 regs
+//void x87_swapreg(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int a, int b);
+// Set rounding according to cw flags, return reg to restore flags
+//int x87_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3);
+// Restore round flag
+//void x87_restoreround(dynarec_rv64_t* dyn, int ninst, int s1);
+// Set rounding according to mxcsr flags, return reg to restore flags
+//int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3);
+
+//void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);
+
+void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
+void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val);
+
+#if STEP < 2
+#define CHECK_CACHE()   0
+#else
+#define CHECK_CACHE()   (cacheupd = CacheNeedsTransform(dyn, ninst))
+#endif
+
+// common coproc helpers
+// reset the cache
+void fpu_reset(dynarec_rv64_t* dyn);
+// reset the cache with n
+void fpu_reset_cache(dynarec_rv64_t* dyn, int ninst, int reset_n);
+// propagate stack state
+void fpu_propagate_stack(dynarec_rv64_t* dyn, int ninst);
+// purge the FPU cache (needs 3 scratch registers)
+void fpu_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3);
+// purge MMX cache
+void mmx_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1);
+// purge x87 cache
+void x87_purgecache(dynarec_rv64_t* dyn, int ninst, int next, int s1, int s2, int s3);
+#ifdef HAVE_TRACE
+void fpu_reflectcache(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3);
+#endif
+void fpu_pushcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07);
+void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07);
+
+uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int seg, int* ok, int* need_epilog);
+//uintptr_t dynarec64_65(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep,int* ok, int* need_epilog);
+//uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_D8(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_DA(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_DC(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_DE(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_6664(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int seg, int* ok, int* need_epilog);
+//uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int rep, int* ok, int* need_epilog);
+//uintptr_t dynarec64_F20F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
+//uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ninst, rex_t rex, int* ok, int* need_epilog);
+
+#if STEP < 2
+#define PASS2(A)
+#else
+#define PASS2(A)   A
+#endif
+
+#if STEP < 3
+#define PASS3(A)
+#else
+#define PASS3(A)   A
+#endif
+
+#if STEP < 3
+#define MAYUSE(A)   (void)A
+#else
+#define MAYUSE(A)   
+#endif
+
+#endif //__DYNAREC_RV64_HELPER_H__
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_jmpnext.c b/src/dynarec/rv64/dynarec_rv64_jmpnext.c
new file mode 100644
index 00000000..5f58183f
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_jmpnext.c
@@ -0,0 +1,13 @@
+#include <stdint.h>
+
+#include "rv64_emitter.h"
+
+#define EMIT(A) *block = (A); ++block
+void CreateJmpNext(void* addr, void* next)
+{
+    uint32_t* block = (uint32_t*)addr;
+    uintptr_t diff = (intptr_t)next - (intptr_t)addr;
+    AUIPC(x2, diff>>12);
+    LD(x2, x2, diff&0b111111111111);
+    BR(x2);
+}
\ No newline at end of file
diff --git a/src/dynarec/rv64/dynarec_rv64_pass0.h b/src/dynarec/rv64/dynarec_rv64_pass0.h
new file mode 100644
index 00000000..fd0f617f
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_pass0.h
@@ -0,0 +1,50 @@
+
+#define INIT    uintptr_t sav_addr=addr
+#define FINI    \
+    dyn->isize = addr-sav_addr;         \
+    dyn->insts[ninst].x64.addr = addr;  \
+    if(ninst) dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr
+
+#define MESSAGE(A, ...)  
+#define MAYSETFLAGS()   dyn->insts[ninst].x64.may_set = 1
+#define READFLAGS(A)    \
+        dyn->insts[ninst].x64.use_flags = A; dyn->f.dfnone = 1;\
+        dyn->f.pending=SF_SET
+#define SETFLAGS(A,B)   \
+        dyn->insts[ninst].x64.set_flags = A;    \
+        dyn->insts[ninst].x64.state_flags = B;  \
+        dyn->f.pending=(B)&SF_SET_PENDING;      \
+        dyn->f.dfnone=((B)&SF_SET)?1:0;
+#define EMIT(A)     
+#define JUMP(A, C)         add_next(dyn, (uintptr_t)A); dyn->insts[ninst].x64.jmp = A; dyn->insts[ninst].x64.jmp_cond = C
+#define BARRIER(A)      if(A!=BARRIER_MAYBE) {fpu_purgecache(dyn, ninst, 0, x1, x2, x3); dyn->insts[ninst].x64.barrier = A;} else dyn->insts[ninst].barrier_maybe = 1
+#define BARRIER_NEXT(A) dyn->insts[ninst+1].x64.barrier = A
+#define NEW_INST \
+        ++dyn->size;                            \
+        if(dyn->size+3>=dyn->cap) {             \
+                dyn->insts = (instruction_native_t*)customRealloc(dyn->insts, sizeof(instruction_native_t)*dyn->cap*2);\
+                memset(&dyn->insts[dyn->cap], 0, sizeof(instruction_native_t)*dyn->cap);   \
+                dyn->cap *= 2;                  \
+        }                                       \
+        dyn->insts[ninst].x64.addr = ip;        \
+        dyn->insts[ninst].f_entry = dyn->f;     \
+        if(ninst) {dyn->insts[ninst-1].x64.size = dyn->insts[ninst].x64.addr - dyn->insts[ninst-1].x64.addr;}
+
+#define INST_EPILOG                             \
+        dyn->insts[ninst].f_exit = dyn->f;      \
+        dyn->insts[ninst].x64.has_next = (ok>0)?1:0;
+#define INST_NAME(name) 
+#define DEFAULT                         \
+        --dyn->size;                    \
+        *ok = -1;                       \
+        if(box64_dynarec_log>=LOG_INFO) {\
+        dynarec_log(LOG_NONE, "%p: Dynarec stopped because of Opcode %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X %02X", \
+        (void*)ip, PKip(0),             \
+        PKip(1), PKip(2), PKip(3),      \
+        PKip(4), PKip(5), PKip(6),      \
+        PKip(7), PKip(8), PKip(9),      \
+        PKip(10),PKip(11),PKip(12),     \
+        PKip(13),PKip(14));             \
+        printFunctionAddr(ip, " => ");  \
+        dynarec_log(LOG_NONE, "\n");    \
+        }
diff --git a/src/dynarec/rv64/dynarec_rv64_pass1.h b/src/dynarec/rv64/dynarec_rv64_pass1.h
new file mode 100644
index 00000000..9e80d7b0
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_pass1.h
@@ -0,0 +1,11 @@
+#define INIT    
+#define FINI
+#define MESSAGE(A, ...)  
+#define EMIT(A)     
+#define NEW_INST                                \
+        dyn->insts[ninst].f_entry = dyn->f;     \
+
+#define INST_EPILOG                             \
+        dyn->insts[ninst].f_exit = dyn->f
+
+#define INST_NAME(name)  
diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h
new file mode 100644
index 00000000..955011d4
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_pass2.h
@@ -0,0 +1,19 @@
+#define INIT        dyn->native_size = 0
+#define FINI                                                                                            \
+        if(ninst) {                                                                                     \
+                dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);     \
+                dyn->insts_size += 1+((dyn->insts[ninst].x64.size>dyn->insts[ninst].size)?dyn->insts[ninst].x64.size:dyn->insts[ninst].size)/15; \
+        }
+
+#define MESSAGE(A, ...)  
+#define EMIT(A)     dyn->insts[ninst].size+=4; dyn->native_size+=4
+#define NEW_INST                                                                                        \
+        if(ninst) {                                                                                     \
+                dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);     \
+                if(isInstClean(dyn, ninst) && dyn->last_ip!=ip)                                         \
+                        dyn->last_ip = 0;                                                               \
+                dyn->insts_size += 1+((dyn->insts[ninst-1].x64.size>dyn->insts[ninst-1].size)?dyn->insts[ninst-1].x64.size:dyn->insts[ninst-1].size)/15; \
+        }
+#define INST_EPILOG dyn->insts[ninst].epilog = dyn->native_size; 
+#define INST_NAME(name) 
+#define TABLE64(A, V)   {Table64(dyn, (V)); EMIT(0); EMIT(0);}
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
new file mode 100644
index 00000000..b9356530
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -0,0 +1,53 @@
+#define INIT    
+#define FINI        \
+    if(ninst)       \
+        addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \
+    addInst(dyn->instsize, &dyn->insts_size, 0, 0);
+#define EMIT(A)     \
+    if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \
+    *(uint32_t*)(dyn->block) = (uint32_t)(A);       \
+    dyn->block += 4; dyn->native_size += 4;         \
+    dyn->insts[ninst].size2 += 4
+
+#define MESSAGE(A, ...)  if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
+#define NEW_INST        \
+    if(ninst && isInstClean(dyn, ninst)) {                      \
+        if(dyn->last_ip!=ip) dyn->last_ip = 0;                  \
+    }                                                           \
+    if(ninst)                                                   \
+        addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4);
+#define INST_EPILOG     
+#define INST_NAME(name) \
+    if(box64_dynarec_dump) {\
+        printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name); \
+        dynarec_log(LOG_NONE, "%s%p: %d emited opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d", \
+            (box64_dynarec_dump>1)?"\e[32m":"", \
+            (void*)(dyn->native_start+dyn->insts[ninst].address),  \
+            dyn->insts[ninst].size/4,           \
+            ninst,                              \
+            dyn->insts[ninst].x64.barrier,      \
+            dyn->insts[ninst].x64.state_flags,  \
+            dyn->f.pending,                     \
+            dyn->f.dfnone,                      \
+            dyn->insts[ninst].x64.may_set?"may":"set",              \
+            dyn->insts[ninst].x64.set_flags,    \
+            dyn->insts[ninst].x64.gen_flags,    \
+            dyn->insts[ninst].x64.use_flags,    \
+            dyn->insts[ninst].x64.need_before,  \
+            dyn->insts[ninst].x64.need_after,   \
+            dyn->smread, dyn->smwrite);         \
+        if(dyn->insts[ninst].pred_sz) {         \
+            dynarec_log(LOG_NONE, ", pred=");   \
+            for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)\
+                dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]);\
+        }                                       \
+        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0)\
+            dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);\
+        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1)\
+            dynarec_log(LOG_NONE, ", jmp=out"); \
+        if(dyn->last_ip)                        \
+            dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);\
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");                       \
+    }
+
+#define TABLE64(A, V)   {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); AUIPC(A, (val64offset>>12)); LD(A, A, (val64offset&0b111111111111));}
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
new file mode 100644
index 00000000..7b8fdec2
--- /dev/null
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -0,0 +1,80 @@
+#ifndef __DYNAREC_RV64_PRIVATE_H_
+#define __DYNAREC_RV64_PRIVATE_H_
+#include <stdint.h>
+
+#include "../dynarec_private.h"
+
+typedef struct x64emu_s x64emu_t;
+typedef struct dynablock_s dynablock_t;
+typedef struct instsize_s instsize_t;
+
+#define BARRIER_MAYBE   8
+
+typedef struct flagcache_s {
+    int                 pending;    // is there a pending flags here, or to check?
+    int                 dfnone;     // if defered flags is already set to df_none
+} flagcache_t;
+
+typedef struct instruction_rv64_s {
+    instruction_x64_t   x64;
+    uintptr_t           address;    // (start) address of the arm emited instruction
+    uintptr_t           epilog;     // epilog of current instruction (can be start of next, or barrier stuff)
+    int                 size;       // size of the arm emited instruction
+    int                 size2;      // size of the arm emited instrucion after pass2
+    int                 pred_sz;    // size of predecessor list
+    int                 *pred;      // predecessor array
+    uintptr_t           mark, mark2, mark3;
+    uintptr_t           markf;
+    uintptr_t           markseg;
+    uintptr_t           marklock;
+    int                 pass2choice;// value for choices that are fixed on pass2 for pass3
+    uintptr_t           natcall;
+    int                 retn;
+    int                 barrier_maybe;
+    flagcache_t         f_exit;     // flags status at end of intruction
+    flagcache_t         f_entry;    // flags status before the instruction begin
+} instruction_rv64_t;
+
+typedef struct dynarec_rv64_s {
+    instruction_rv64_t* insts;
+    int32_t             size;
+    int32_t             cap;
+    uintptr_t           start;      // start of the block
+    uint32_t            isize;      // size in byte of x64 instructions included
+    void*               block;      // memory pointer where next instruction is emited
+    uintptr_t           native_start;  // start of the arm code
+    size_t              native_size;   // size of emitted arm code
+    uintptr_t           last_ip;    // last set IP in RIP (or NULL if unclean state) TODO: move to a cache something
+    uint64_t*           table64;   // table of 64bits value
+    int                 table64size;// size of table (will be appended at end of executable code)
+    int                 table64cap;
+    uintptr_t           tablestart;
+    flagcache_t         f;
+    uintptr_t*          next;       // variable array of "next" jump address
+    int                 next_sz;
+    int                 next_cap;
+    int*                predecessor;// single array of all predecessor
+    dynablock_t*        dynablock;
+    instsize_t*         instsize;
+    size_t              insts_size; // size of the instruction size array (calculated)
+    uint8_t             smread;    // for strongmem model emulation
+    uint8_t             smwrite;    // for strongmem model emulation
+    uintptr_t           forward;    // address of the last end of code while testing forward
+    uintptr_t           forward_to; // address of the next jump to (to check if everything is ok)
+    int32_t             forward_size;   // size at the forward point
+    int                 forward_ninst;  // ninst at the forward point
+} dynarec_rv64_t;
+
+void add_next(dynarec_rv64_t *dyn, uintptr_t addr);
+uintptr_t get_closest_next(dynarec_rv64_t *dyn, uintptr_t addr);
+int is_nops(dynarec_rv64_t *dyn, uintptr_t addr, int n);
+int is_instructions(dynarec_rv64_t *dyn, uintptr_t addr, int n);
+
+int Table64(dynarec_rv64_t *dyn, uint64_t val);  // add a value to etable64 (if needed) and gives back the imm19 to use in LDR_literal
+
+void CreateJmpNext(void* addr, void* next);
+
+//TODO: GO_TRACE() !
+#define GO_TRACE()
+
+#endif //__DYNAREC_RV64_PRIVATE_H_
\ No newline at end of file
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
new file mode 100644
index 00000000..6810a484
--- /dev/null
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -0,0 +1,223 @@
+#ifndef __RV64_EMITTER_H__
+#define __RV64_EMITTER_H__
+/*
+    RV64 Emitter
+
+*/
+
+// RV64 ABI
+/*
+reg     name    description                     saver
+------------------------------------------------------
+x0      zero    Hard-wired zero                 —
+x1      ra      Return address                  Caller
+x2      sp      Stack pointer                   Callee
+x3      gp      Global pointer                  —
+x4      tp      Thread pointer                  —
+x5–7    t0–2    Temporaries                     Caller
+x8      s0/fp   Saved register/frame pointer    Callee
+x9      s1      Saved register                  Callee
+x10–11  a0–1    Function arguments/return val.  Caller
+x12–17  a2–7    Function arguments              Caller
+x18–27  s2–11   Saved registers                 Callee
+x28–31  t3–6    Temporaries                     Caller
+-------------------------------------------------------
+f0–7    ft0–7   FP temporaries                  Caller
+f8–9    fs0–1   FP saved registers              Callee
+f10–11  fa0–1   FP arguments/return values      Caller
+f12–17  fa2–7   FP arguments                    Caller
+f18–27  fs2–11  FP saved registers              Callee
+f28–31  ft8–11  FP temporaries                  Caller
+*/
+// x86 Register mapping
+#define xRAX    16
+#define xRCX    17
+#define xRDX    18
+#define xRBX    19
+#define xRSP    20
+#define xRBP    21
+#define xRSI    22
+#define xRDI    23
+#define xR8     24
+#define xR9     25
+#define xR10    26
+#define xR11    27
+#define xR12    28
+#define xR13    29
+#define xR14    30
+#define xR15    31
+#define xFlags  5
+#define xRIP    6
+
+// 32bits version
+#define wEAX    xRAX
+#define wECX    xRCX
+#define wEDX    xRDX
+#define wEBX    xRBX
+#define wESP    xRSP
+#define wEBP    xRBP
+#define wESI    xRSI
+#define wEDI    xRDI
+#define wR8     xR8
+#define wR9     xR9
+#define wR10    xR10
+#define wR11    xR11
+#define wR12    xR12
+#define wR13    xR13
+#define wR14    xR14
+#define wR15    xR15
+#define wFlags  xFlags
+// scratch registers
+#define x1      11
+#define x2      12
+#define x3      13
+#define x4      14
+#define x5      15
+// used to clear the upper 32bits
+#define xMASK   7
+// 32bits version of scratch
+#define w1      x1
+#define w2      x2
+#define w3      x3
+#define w4      x4
+#define w5      x5
+#define w6      x6
+// emu is r10
+#define xEmu    10
+// RV64 RA
+#define xRA     1
+#define xSP     2
+// RV64 args
+#define A0      10
+#define A1      11
+// xZR reg is 0
+#define xZR     0
+#define wZR     xZR
+
+// MOVE64x is quite complex, so use a function for this
+#define MOV64x(A, B)    rv64_move64(dyn, ninst, A, B)
+
+// ZERO the upper part
+#define ZEROUP(r)       AND(r, r, xMASK)
+
+#define R_type(funct7, rs2, rs1, funct3, rd, opcode)    ((funct7)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode))
+#define I_type(imm12, rs1, funct3, rd, opcode)    ((imm12)<<20 | (rs1)<<15 | (funct3)<<12 | (rd)<<7 | (opcode))
+#define S_type(imm12, rs2, rs1, funct3, opcode)    (((imm12)>>5)<<25 | (rs2)<<20 | (rs1)<<15 | (funct3)<<12 | ((imm12)&31)<<7 | (opcode))
+#define B_type(imm13, rs2, rs1, funct3, opcode)      ((((imm13)>>12)&1)<<31 | (((imm13)>>5)&63)<<25 | (rs)<<20 | (rs1)<<15 | (funct3)<<13 | (((imm13)>>1)&15)<<8 | (((imm13)>>11)&1)<<7 | (opcode))
+#define U_type(imm32, rd, opcode)   (((imm32)>>12)<<12 | (rd)<<7 | (opcode))
+#define J_type(imm21, rd, opcode)    ((((imm21)>>20)&1)<<31 | (((imm21)>>1)&0b1111111111)<<21 | (((imm21)>>11)&1)<<20 | (((imm21)>>12)&0b11111111)<<12 | (rd)<<7 | (opcode))
+
+// RV32I
+// put imm20 in the [31:12] bits of rd, zero [11:0] and sign extend bits31
+#define LUI(rd, imm20)                 EMIT(U_type((imm20)<<12, rd, 0b0110111))
+// put PC+imm20 in rd
+#define AUIPC(rd, imm20)               EMIT(U_type((imm20)>>12, rd, 0b0010111))
+
+#define JAL_gen(rd, imm21)             J_type(imm21, rd, 0b1101111)
+// Unconditionnal branch, no return address set
+#define B(imm21)                       EMIT(JAL_gen(xZR, imm21))
+// Uncondiftionnal branch, return set to xRA
+#define JAL(imm21)                     EMIT(JAL_gen(xRA, imm21))
+
+#define JALR_gen(rd, rs1, imm12)       I_type(imm12, rs1, 0b000, rd, 0b1100111)
+// Unconditionnal branch to r, no return address set
+#define BR(r)                          EMIT(JALR_gen(xZR, r, 0))
+// Unconditionnal branch to r+i12, no return address set
+#define BR_I12(r, imm12)               EMIT(JALR_gen(xZR, r, (imm12)&0b111111111111))
+// Unconditionnal branch to r, return address set to xRA
+#define JALR(r)                        EMIT(JALR_gen(xRA, r, 0))
+// Unconditionnal branch to r+i12, return address set to xRA
+#define JALR_I12(r, imm12)             EMIT(JALR_gen(xRA, r, (imm12)&0b111111111111))
+
+// rd = rs1 + imm12
+#define ADDI(rd, rs1, imm12)        EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0010011))
+// rd = rs1 - imm12 (pseudo instruction)
+#define SUBI(rd, rs1, imm12)        EMIT(I_type((-(imm12))&0b111111111111, rs1, 0b000, rd, 0b0010011))
+// rd = (rs1<imm12)?1:0
+#define SLTI(rd, rs1, imm12)        EMIT(I_type((imm12)&0b111111111111, rs1, 0b010, rd, 0b0010011))
+// rd = (rs1<imm12)?1:0 unsigned
+#define SLTIU(rd, rs1, imm12)       EMIT(I_type((imm12)&0b111111111111, rs1, 0b011, rd, 0b0010011))
+// rd = rs1 ^ imm12
+#define XORI(rd, rs1, imm12)        EMIT(I_type((imm12)&0b111111111111, rs1, 0b100, rd, 0b0010011))
+// rd = rs1 | imm12
+#define ORI(rd, rs1, imm12)         EMIT(I_type((imm12)&0b111111111111, rs1, 0b110, rd, 0b0010011))
+// rd = rs1 & imm12
+#define ANDI(rd, rs1, imm12)        EMIT(I_type((imm12)&0b111111111111, rs1, 0b101, rd, 0b0010011))
+
+// rd = imm12
+#define MOV_U12(rd, imm12)          ADDI(rd, xZR, imm12)
+// nop
+#define NOP()                       ADDI(xZR, xZR, 0)
+
+// rd = rs1 + rs2
+#define ADD(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0110011))
+// rd = rs1 - rs2
+#define SUB(rd, rs1, rs2)           EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011))
+// rd = rs1<<rs2
+#define SLL(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011))
+// rd = (rs1<rs2)?1:0
+#define SLT(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b010, rd, 0b0110011))
+// rd = (rs1<rs2)?1:0 Unsigned
+#define SLTU(rd, rs1, rs2)          EMIT(R_type(0b0000000, rs2, rs1, 0b011, rd, 0b0110011))
+// rd = rs1 ^ rs2
+#define XOR(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b100, rd, 0b0110011))
+// rd = rs1>>rs2 logical
+#define SRL(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b101, rd, 0b0110011))
+// rd = rs1>>rs2 aritmetic
+#define SRA(rd, rs1, rs2)           EMIT(R_type(0b0100000, rs2, rs1, 0b101, rd, 0b0110011))
+// rd = rs1 | rs2
+#define OR(rd, rs1, rs2)            EMIT(R_type(0b0000000, rs2, rs1, 0b110, rd, 0b0110011))
+// rd = rs1 & rs2
+#define AND(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b111, rd, 0b0110011))
+
+// rd = rs1 (pseudo instruction)
+#define MV(rd, rs1)                 ADDI(rd, rs1, 0)
+// rd = rs1 (pseudo instruction)
+#define MVxw(rd, rs1)               if(rex.w) {MV(rd, rs1); } else {AND(rd, rs1, xMASK);}
+// rd = !rs1
+#define NOT(rd, rs1)                XORI(rd, rs1, -1)
+// rd = -rs1
+#define NEG(rd, rs1)                SUB(rd, xZR, rs1)
+
+// rd = 4-bytes[rs1+imm12] signed extended
+#define LW(rd, rs1, imm12)          EMIT(I_type(imm12, rs1, 0b010, rd, 0b0000011))
+// rd = 2-bytes[rs1+imm12] signed extended
+#define LH(rd, rs1, imm12)          EMIT(I_type(imm12, rs1, 0b001, rd, 0b0000011))
+// rd = byte[rs1+imm12] signed extended
+#define LB(rd, rs1, imm12)          EMIT(I_type(imm12, rs1, 0b000, rd, 0b0000011))
+// rd = 2-bytes[rs1+imm12] zero extended
+#define LHU(rd, rs1, imm12)         EMIT(I_type(imm12, rs1, 0b101, rd, 0b0000011))
+// rd = byte[rs1+imm12] zero extended
+#define LBU(rd, rs1, imm12)         EMIT(I_type(imm12, rs1, 0b100, rd, 0b0000011))
+// byte[rs1+imm12] = rs2
+#define SB(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b000, 0b0100011))
+// 2-bytes[rs1+imm12] = rs2
+#define SH(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b001, 0b0100011))
+// 4-bytes[rs1+imm12] = rs2
+#define SW(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011))
+
+#define FENCE_gen(pred, succ)       (((pred)<<24) | ((succ)<<20) | 0b0001111)
+#define FENCE()                     EMIT(FENCE_gen(3, 3))
+
+#define FENCE_I_gen()               ((0b001<<12) | 0b0001111)
+#define FENCE_I()                   EMIT(FENCE_I_gen())
+
+// RV64I
+#define LWU(rd, rs1, imm12)         EMIT(I_type(imm12, rs1, 0b110, rd, 0b0000011))
+
+// rd = [rs1 + imm12]
+#define LD(rd, rs1, imm12)          EMIT(I_type(imm12, rs1, 0b011, rd, 0b0000011))
+// [rs1 + imm12] = rs2
+#define SD(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b011, 0b0100011))
+// [rs1 + imm12] = rs2
+#define SDxw(rs2, rs1, imm12)       EMIT(S_type(imm12, rs2, rs1, 0b010+rex.w, 0b0100011))
+
+// Shift Left Immediate
+#define SLLI(rd, rs1, imm6)         EMIT(I_type(imm6, rs1, 0b001, rd, 0b0010011))
+// Shift Right Logical Immediate
+#define SRLI(rd, rs1, imm6)         EMIT(I_type(imm6, rs1, 0b101, rd, 0b0010011))
+// Shift Right Aritmetic Immediate
+#define SRAI(rd, rs1, imm6)         EMIT(I_type((imm6)|(0b010000<<6), rs1, 0b101, rd, 0b0010011))
+
+
+#endif //__RV64_EMITTER_H__ 
\ No newline at end of file
diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S
new file mode 100644
index 00000000..610a2483
--- /dev/null
+++ b/src/dynarec/rv64/rv64_epilog.S
@@ -0,0 +1,61 @@
+//riscv epilog for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.global rv64_epilog
+rv64_epilog:
+    //update register -> emu
+    sd      x16, (a0)
+    sd      x17, 8(a0)
+    sd      x18, 16(a0)
+    sd      x19, 24(a0)
+    sd      x20, 32(a0)
+    sd      x21, 40(a0)
+    sd      x22, 48(a0)
+    sd      x23, 56(a0)
+    sd      x24, 64(a0)
+    sd      x25, 72(a0)
+    sd      x26, 80(a0)
+    sd      x27, 88(a0)
+    sd      x28, 96(a0)
+    sd      x29, 104(a0)
+    sd      x30, 112(a0)
+    sd      x31, 120(a0)
+    sd      x5, 128(a0)     //xFlags
+    sd      x6, 136(a0)     // put back reg value in emu, including EIP (so x27 must be EIP now)
+    //restore all used register
+    ld      ra, (sp)  // save ra
+    ld      x8, 8(sp) // save fp
+    ld      x18, 16(sp)
+    ld      x19, 24(sp)
+    ld      x20, 32(sp)
+    ld      x21, 40(sp)
+    ld      x22, 48(sp)
+    ld      x23, 56(sp)
+    ld      x24, 64(sp)
+    ld      x25, 72(sp)
+    ld      x26, 80(sp)
+    ld      x27, 88(sp)
+    fsd     f8, 96(sp)
+    fsd     f9, 104(sp)
+    addi    sp,  sp, (8 * 14)
+    //end, return...
+    ret
+
+
+.global rv64_epilog_fast
+rv64_epilog_fast:
+    //restore all used register
+    ld      ra, (sp)  // save ra
+    ld      x8, 8(sp) // save fp
+    ld      x16, 16(sp)
+    ld      x17, 24(sp)
+    fld     f8, 32(sp)
+    fld     f9, 40(sp)
+    addi    sp,  sp, (8 * (2+4))
+    //end, return...
+    ret
diff --git a/src/dynarec/rv64/rv64_lock.S b/src/dynarec/rv64/rv64_lock.S
new file mode 100644
index 00000000..c1d3138d
--- /dev/null
+++ b/src/dynarec/rv64/rv64_lock.S
@@ -0,0 +1,173 @@
+// RV64 lock helper
+// there is 2 part: read and write
+// write return 0 on success, 1 on fail (value has been changed)
+
+.text
+.align 4
+
+.global rv64_lock_xchg
+.global rv64_lock_xchg_d
+.global rv64_lock_storeifnull
+.global rv64_lock_storeifnull_d
+.global rv64_lock_storeifref
+.global rv64_lock_storeifref_d
+.global rv64_lock_storeifref2_d
+.global rv64_lock_decifnot0b
+.global rv64_lock_storeb
+.global rv64_lock_incif0
+.global rv64_lock_decifnot0
+.global rv64_lock_store
+.global rv64_lock_cas_d
+.global rv64_lock_cas_dd
+.global rv64_lock_cas_dq
+
+rv64_lock_xchg:
+    // address is a0, value is a1, return old value in a0
+    amoswap.d.aqrl  a0, a1, (a0)
+    ret
+
+rv64_lock_xchg_d:
+    // address is a0, value is a1, return old value in a0
+    amoswap.w.aqrl  a0, a1, (a0)
+    ret
+
+rv64_lock_storeifnull:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
+    fence   rw, rw
+1:    
+    lr.d    a2, (a0)
+    bnez    a2, 2f
+    sc.d    a3, a1, (a0)
+    bnez    a3, 1b
+2:
+    mv      a0, a2
+    ret
+
+rv64_lock_storeifnull_d:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is 0. return old [a0] value
+    fence   rw, rw
+1:    
+    lr.w    a2, (a0)
+    bnez    a2, 2f
+    sc.w    a3, a1, (a0)
+    bnez    a3, 1b
+2:
+    mv      a0, a2
+    ret
+
+rv64_lock_storeifref:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
+    fence   rw, rw
+1:    
+    lr.d    a3, (a0)
+    bne     a2, a3, 2f
+    sc.d    a4, a1, (a0)
+    bnez    a4, 1b
+    fence   rw, rw
+    mv      a0, a1
+    ret
+2:
+    fence   rw, rw
+    mv      a0, a3
+    ret
+
+rv64_lock_storeifref_d:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return new [a0] value (so a1 or old value)
+    fence   rw, rw
+1:    
+    lr.w    a3, (a0)
+    bne     a2, a3, 2f
+    sc.w    a4, a1, (a0)
+    bnez    a4, 1b
+    mv      a0, a1
+    ret
+2:
+    mv      a0, a3
+    ret
+
+rv64_lock_storeifref2_d:
+    // address is a0, value is a1, a1 store to a0 only if [a0] is a2. return old [a0] value
+    fence   rw, rw
+1:    
+    lr.w    a3, (a0)
+    bne     a2, a3, 2f
+    sc.w    a4, a1, (a0)
+    bnez    a4, 1b
+2:
+    mv      a0, a3
+    ret
+
+rv64_lock_decifnot0b:
+    fence   rw, rw
+1:
+    lr.w    a1, (a0)
+    andi    a1, a1, 0xff
+    beqz    a1, 2f
+    addi    a1, a1, -1
+    sc.w    a2, a1, (a0)
+    bnez    a2, 1b
+2:
+    ret
+
+rv64_lock_storeb:
+    sb      a1, 0(a0)
+    fence   rw, rw
+    ret
+
+rv64_lock_decifnot0:
+    fence   rw, rw
+1:
+    lr.w    a1, (a0)
+    beqz    a1, 2f
+    addi    a1, a1, -1
+    sc.w    a2, a1, (a0)
+    bnez    a2, 1b
+2:
+    mv      a0, a1
+    ret
+
+rv64_lock_incif0:
+    fence   rw, rw
+1:
+    lr.w    a1, (a0)
+    bnez    a1, 2f
+    addi    a1, a1, 1
+    sc.w    a2, a1, (a0)
+    bnez    a2, 1b
+2:
+    mv      a0, a1
+    ret
+
+rv64_lock_store:
+    sw      a1, 0(a0)
+    fence   rw, rw
+    ret
+
+rv64_lock_cas_d:
+    lr.w t0, (a0)
+    bne t0, a1, 1f
+    sc.w a0, a2, (a0)
+    ret
+1:
+    li a0, 1
+    ret
+
+rv64_lock_cas_dd:
+    lr.d t0, (a0)
+    bne t0, a1, 1f
+    sc.d a0, a2, (a0)
+    ret
+1:
+    li a0, 1
+    ret
+
+rv64_lock_cas_dq:
+    mv a4, a0
+    lr.d t0, (a0)
+    bne t0, a1, 1f
+    sc.d a0, a2, (a0)
+    sd a3,(a4)
+    ret
+1:
+    li a0, 1
+    ret
\ No newline at end of file
diff --git a/src/dynarec/rv64/rv64_lock.h b/src/dynarec/rv64/rv64_lock.h
new file mode 100644
index 00000000..71600e57
--- /dev/null
+++ b/src/dynarec/rv64/rv64_lock.h
@@ -0,0 +1,60 @@
+#ifndef __RV64_LOCK__H__
+#define __RV64_LOCK__H__
+#include <stdint.h>
+
+// Atomicaly store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int rv64_lock_cas_d(void* p, int32_t ref, int32_t val);
+
+// Atomicaly store val at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int rv64_lock_cas_dd(void* p, int64_t ref, int64_t val);
+
+// Atomicaly exchange value at [p] with val, return old p
+extern uintptr_t rv64_lock_xchg(void* p, uintptr_t val);
+
+// Atomicaly exchange value at [p] with val, return old p
+extern uint32_t rv64_lock_xchg_d(void* p, uint32_t val);
+
+// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value
+extern uint32_t rv64_lock_storeifnull_d(void*p, uint32_t val);
+
+// Atomicaly store value to [p] only if [p] is NULL. Return old [p] value
+extern void* rv64_lock_storeifnull(void*p, void* val);
+
+// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern void* rv64_lock_storeifref(void*p, void* val, void* ref);
+
+// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern uint32_t rv64_lock_storeifref_d(void*p, uint32_t val, uint32_t ref);
+
+// Atomicaly store value to [p] only if [p] is ref. Return new [p] value (so val or old)
+extern uint32_t rv64_lock_storeifref2_d(void*p, uint32_t val, uint32_t ref);
+
+// decrement atomicaly the byte at [p] (but only if p not 0)
+extern void rv64_lock_decifnot0b(void*p);
+
+// atomic store (with memory barrier)
+extern void rv64_lock_storeb(void*p, uint8_t b);
+
+// increment atomicaly the int at [p] only if it was 0. Return the old value of [p]
+extern int rv64_lock_incif0(void*p);
+
+// decrement atomicaly the int at [p] (but only if p not 0)
+extern int rv64_lock_decifnot0(void*p);
+
+// atomic store (with memory barrier)
+extern void rv64_lock_store(void*p, uint32_t v);
+
+// (mostly) Atomicaly store val1 and val2 at [p] if old [p] is ref. Return 0 if OK, 1 is not. p needs to be aligned
+extern int rv64_lock_cas_dq(void* p, uint64_t ref, uint64_t val1, uint64_t val2);
+
+// Not defined in assembler but in dynarec_rv64_functions
+uint8_t extract_byte(uint32_t val, void* address);
+uint32_t insert_byte(uint32_t val, uint8_t b, void* address);
+uint16_t extract_half(uint32_t val, void* address);
+uint32_t insert_half(uint32_t val, uint16_t h, void* address);
+
+uint8_t rv64_lock_xchg_b(void* addr, uint8_t v);
+extern int rv64_lock_cas_b(void* p, uint8_t ref, uint8_t val);
+extern int rv64_lock_cas_h(void* p, uint16_t ref, uint16_t val);
+
+#endif  //__RV64_LOCK__H__
diff --git a/src/dynarec/rv64/rv64_next.S b/src/dynarec/rv64/rv64_next.S
new file mode 100644
index 00000000..d9cab836
--- /dev/null
+++ b/src/dynarec/rv64/rv64_next.S
@@ -0,0 +1,54 @@
+//riscv update linker table for dynarec
+//called with pointer to emu as 1st parameter
+//and address of table to as 2nd parameter
+//ip is at r12
+
+.text
+.align 4
+
+.extern LinkNext
+
+.global rv64_next
+
+    .8byte  0   // NULL pointer before rv64_next, for getDB
+rv64_next:
+    // emu is a0
+    // IP address is a1
+    addi    sp,  sp,  -(8 * 10)
+    sd      a0, (sp)
+    sd      a1, 8(sp)
+    sd      x5, 16(sp)
+    sd      x6, 24(sp)
+    sd      x16, 32(sp)
+    sd      x17, 40(sp)
+    sd      x28, 48(sp)
+    sd      x29, 56(sp)
+    sd      x30, 64(sp)
+    sd      x31, 72(sp)
+
+    mv      a2, ra      // "from" is in ra, so put in a2
+    addi    a3, sp, 24   // a3 is address to change rip
+    // call the function
+1:
+    auipc   a4, %pcrel_hi(LinkNext)
+    jalr    a4, %pcrel_lo(1b)
+    // preserve return value
+    mv      a3, a0
+    // pop regs
+    ld      a0, (sp)
+    ld      a1, 8(sp)
+    ld      x5, 16(sp)
+    ld      x6, 24(sp)
+    ld      x16, 32(sp)
+    ld      x17, 40(sp)
+    ld      x28, 48(sp)
+    ld      x29, 56(sp)
+    ld      x30, 64(sp)
+    ld      x31, 72(sp)
+    addi    sp,  sp,  (8 * 10)
+    // setup xMASK
+    xori    x7, x0, -1
+    srli    x7, x7, 32
+    // return offset is jump address
+    jr      a3
+
diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c
new file mode 100644
index 00000000..ce767298
--- /dev/null
+++ b/src/dynarec/rv64/rv64_printer.c
@@ -0,0 +1,1368 @@
+#include <assert.h>
+#include <stddef.h>
+#include <string.h>
+#include <stdio.h>
+
+#include "rv64_printer.h"
+#include "debug.h"
+
+typedef struct {
+    int8_t rd;
+    int8_t rs1;
+    int8_t rs2;
+    int8_t rs3;
+    int32_t imm;
+    uint16_t csr;
+    char *name;
+    bool rvc;
+    bool f;
+} insn_t;
+
+static const char gpnames[32][9] = {
+    "zero", "ra",   "sp",   "gp",   "tp",   "t0_flags",   "t1_rip",   "t2",
+    "s0",   "s1",   "a0",   "a1",   "a2",   "a3",   "a4",   "a5",
+    "a6_rax",   "a7_rcx",   "s2_rdx",   "s3_rbx",   "s4_rsp",   "s5_rbp",   "s6_rsi",   "s7_rdi",
+    "s8_r8",   "s9_r9",   "s10_r10",  "s11_r11",  "t3_r12",   "t4_r13",   "t5_r14",   "t6_r15",
+};
+
+static const char fpnames[32][5] = {
+    "ft0",  "ft1",  "ft2",  "ft3",  "ft4",  "ft5",  "ft6",  "ft7",
+    "fs0",  "fs1",  "fa0",  "fa1",  "fa2",  "fa3",  "fa4",  "fa5",
+    "fa6",  "fa7",  "fs2",  "fs3",  "fs4",  "fs5",  "fs6",  "fs7",
+    "fs8",  "fs9",  "fs10", "fs11", "ft8",  "ft9",  "ft10", "ft11",
+};
+
+#define QUADRANT(data) (((data) >>  0) & 0x3 )
+
+/**
+ * normal types
+*/
+#define OPCODE(data) (((data) >>  2) & 0x1f)
+#define RD(data)     (((data) >>  7) & 0x1f)
+#define RS1(data)    (((data) >> 15) & 0x1f)
+#define RS2(data)    (((data) >> 20) & 0x1f)
+#define RS3(data)    (((data) >> 27) & 0x1f)
+#define FUNCT2(data) (((data) >> 25) & 0x3 )
+#define FUNCT3(data) (((data) >> 12) & 0x7 )
+#define FUNCT7(data) (((data) >> 25) & 0x7f)
+#define IMM116(data) (((data) >> 26) & 0x3f)
+
+static inline insn_t insn_utype_read(uint32_t data)
+{
+    return (insn_t) {
+        .imm = (int32_t)data & 0xfffff000,
+        .rd = RD(data),
+    };
+}
+
+static inline insn_t insn_itype_read(uint32_t data)
+{
+    return (insn_t) {
+        .imm = (int32_t)data >> 20,
+        .rs1 = RS1(data),
+        .rd = RD(data),
+    };
+}
+
+static inline insn_t insn_jtype_read(uint32_t data)
+{
+    uint32_t imm20   = (data >> 31) & 0x1;
+    uint32_t imm101  = (data >> 21) & 0x3ff;
+    uint32_t imm11   = (data >> 20) & 0x1;
+    uint32_t imm1912 = (data >> 12) & 0xff;
+
+    int32_t imm = (imm20 << 20) | (imm1912 << 12) | (imm11 << 11) | (imm101 << 1);
+    imm = (imm << 11) >> 11;
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RD(data),
+    };
+}
+
+static inline insn_t insn_btype_read(uint32_t data)
+{
+    uint32_t imm12  = (data >> 31) & 0x1;
+    uint32_t imm105 = (data >> 25) & 0x3f;
+    uint32_t imm41  = (data >>  8) & 0xf;
+    uint32_t imm11  = (data >>  7) & 0x1;
+
+    int32_t imm = (imm12 << 12) | (imm11 << 11) |(imm105 << 5) | (imm41 << 1);
+    imm = (imm << 19) >> 19;
+
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RS1(data),
+        .rs2 = RS2(data),
+    };
+}
+
+static inline insn_t insn_rtype_read(uint32_t data)
+{
+    return (insn_t) {
+        .rs1 = RS1(data),
+        .rs2 = RS2(data),
+        .rd = RD(data),
+    };
+}
+
+static inline insn_t insn_stype_read(uint32_t data)
+{
+    uint32_t imm115 = (data >> 25) & 0x7f;
+    uint32_t imm40  = (data >>  7) & 0x1f;
+
+    int32_t imm = (imm115 << 5) | imm40;
+    imm = (imm << 20) >> 20;
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RS1(data),
+        .rs2 = RS2(data),
+    };
+}
+
+static inline insn_t insn_csrtype_read(uint32_t data)
+{
+    return (insn_t) {
+        .csr = data >> 20,
+        .rs1 = RS1(data),
+        .rd =  RD(data),
+    };
+}
+
+static inline insn_t insn_fprtype_read(uint32_t data)
+{
+    return (insn_t) {
+        .rs1 = RS1(data),
+        .rs2 = RS2(data),
+        .rs3 = RS3(data),
+        .rd =  RD(data),
+    };
+}
+
+/**
+ * compressed types
+*/
+#define COPCODE(data)     (((data) >> 13) & 0x7 )
+#define CFUNCT1(data)     (((data) >> 12) & 0x1 )
+#define CFUNCT2LOW(data)  (((data) >>  5) & 0x3 )
+#define CFUNCT2HIGH(data) (((data) >> 10) & 0x3 )
+#define RP1(data)         (((data) >>  7) & 0x7 )
+#define RP2(data)         (((data) >>  2) & 0x7 )
+#define RC1(data)         (((data) >>  7) & 0x1f)
+#define RC2(data)         (((data) >>  2) & 0x1f)
+
+static inline insn_t insn_catype_read(uint16_t data)
+{
+    return (insn_t) {
+        .rd = RP1(data) + 8,
+        .rs2 = RP2(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_crtype_read(uint16_t data)
+{
+    return (insn_t) {
+        .rs1 = RC1(data),
+        .rs2 = RC2(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_citype_read(uint16_t data)
+{
+    uint32_t imm40 = (data >>  2) & 0x1f;
+    uint32_t imm5  = (data >> 12) & 0x1;
+    int32_t imm = (imm5 << 5) | imm40;
+    imm = (imm << 26) >> 26;
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RC1(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_citype_read2(uint16_t data)
+{
+    uint32_t imm86 = (data >>  2) & 0x7;
+    uint32_t imm43 = (data >>  5) & 0x3;
+    uint32_t imm5  = (data >> 12) & 0x1;
+
+    int32_t imm = (imm86 << 6) | (imm43 << 3) | (imm5 << 5);
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RC1(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_citype_read3(uint16_t data)
+{
+    uint32_t imm5  = (data >>  2) & 0x1;
+    uint32_t imm87 = (data >>  3) & 0x3;
+    uint32_t imm6  = (data >>  5) & 0x1;
+    uint32_t imm4  = (data >>  6) & 0x1;
+    uint32_t imm9  = (data >> 12) & 0x1;
+
+    int32_t imm = (imm5 << 5) | (imm87 << 7) | (imm6 << 6) | (imm4 << 4) | (imm9 << 9);
+    imm = (imm << 22) >> 22;
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RC1(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_citype_read4(uint16_t data)
+{
+    uint32_t imm5  = (data >> 12) & 0x1;
+    uint32_t imm42 = (data >>  4) & 0x7;
+    uint32_t imm76 = (data >>  2) & 0x3;
+
+    int32_t imm = (imm5 << 5) | (imm42 << 2) | (imm76 << 6);
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RC1(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_citype_read5(uint16_t data)
+{
+    uint32_t imm1612 = (data >>  2) & 0x1f;
+    uint32_t imm17   = (data >> 12) & 0x1;
+
+    int32_t imm = (imm1612 << 12) | (imm17 << 17);
+    imm = (imm << 14) >> 14;
+    return (insn_t) {
+        .imm = imm,
+        .rd = RC1(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cbtype_read(uint16_t data)
+{
+    uint32_t imm5  = (data >>  2) & 0x1;
+    uint32_t imm21 = (data >>  3) & 0x3;
+    uint32_t imm76 = (data >>  5) & 0x3;
+    uint32_t imm43 = (data >> 10) & 0x3;
+    uint32_t imm8  = (data >> 12) & 0x1;
+
+    int32_t imm = (imm8 << 8) | (imm76 << 6) | (imm5 << 5) | (imm43 << 3) | (imm21 << 1);
+    imm = (imm << 23) >> 23;
+
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RP1(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cbtype_read2(uint16_t data)
+{
+    uint32_t imm40 = (data >>  2) & 0x1f;
+    uint32_t imm5  = (data >> 12) & 0x1;
+    int32_t imm = (imm5 << 5) | imm40;
+    imm = (imm << 26) >> 26;
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RP1(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cstype_read(uint16_t data)
+{
+    uint32_t imm76 = (data >>  5) & 0x3;
+    uint32_t imm53 = (data >> 10) & 0x7;
+
+    int32_t imm = ((imm76 << 6) | (imm53 << 3));
+
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RP1(data) + 8,
+        .rs2 = RP2(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cstype_read2(uint16_t data)
+{
+    uint32_t imm6  = (data >>  5) & 0x1;
+    uint32_t imm2  = (data >>  6) & 0x1;
+    uint32_t imm53 = (data >> 10) & 0x7;
+
+    int32_t imm = ((imm6 << 6) | (imm2 << 2) | (imm53 << 3));
+
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RP1(data) + 8,
+        .rs2 = RP2(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cjtype_read(uint16_t data)
+{
+    uint32_t imm5  = (data >>  2) & 0x1;
+    uint32_t imm31 = (data >>  3) & 0x7;
+    uint32_t imm7  = (data >>  6) & 0x1;
+    uint32_t imm6  = (data >>  7) & 0x1;
+    uint32_t imm10 = (data >>  8) & 0x1;
+    uint32_t imm98 = (data >>  9) & 0x3;
+    uint32_t imm4  = (data >> 11) & 0x1;
+    uint32_t imm11 = (data >> 12) & 0x1;
+
+    int32_t imm = ((imm5 << 5) | (imm31 << 1) | (imm7 << 7) | (imm6 << 6) |
+               (imm10 << 10) | (imm98 << 8) | (imm4 << 4) | (imm11 << 11));
+    imm = (imm << 20) >> 20;
+    return (insn_t) {
+        .imm = imm,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cltype_read(uint16_t data)
+{
+    uint32_t imm6  = (data >>  5) & 0x1;
+    uint32_t imm2  = (data >>  6) & 0x1;
+    uint32_t imm53 = (data >> 10) & 0x7;
+
+    int32_t imm = (imm6 << 6) | (imm2 << 2) | (imm53 << 3);
+
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RP1(data) + 8,
+        .rd  = RP2(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_cltype_read2(uint16_t data)
+{
+    uint32_t imm76 = (data >>  5) & 0x3;
+    uint32_t imm53 = (data >> 10) & 0x7;
+
+    int32_t imm = (imm76 << 6) | (imm53 << 3);
+
+    return (insn_t) {
+        .imm = imm,
+        .rs1 = RP1(data) + 8,
+        .rd  = RP2(data) + 8,
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_csstype_read(uint16_t data)
+{
+    uint32_t imm86 = (data >>  7) & 0x7;
+    uint32_t imm53 = (data >> 10) & 0x7;
+
+    int32_t imm = (imm86 << 6) | (imm53 << 3);
+
+    return (insn_t) {
+        .imm = imm,
+        .rs2 = RC2(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_csstype_read2(uint16_t data)
+{
+    uint32_t imm76 = (data >> 7) & 0x3;
+    uint32_t imm52 = (data >> 9) & 0xf;
+
+    int32_t imm = (imm76 << 6) | (imm52 << 2);
+
+    return (insn_t) {
+        .imm = imm,
+        .rs2 = RC2(data),
+        .rvc = true,
+    };
+}
+
+static inline insn_t insn_ciwtype_read(uint16_t data)
+{
+    uint32_t imm3  = (data >>  5) & 0x1;
+    uint32_t imm2  = (data >>  6) & 0x1;
+    uint32_t imm96 = (data >>  7) & 0xf;
+    uint32_t imm54 = (data >> 11) & 0x3;
+
+    int32_t imm = (imm3 << 3) | (imm2 << 2) | (imm96 << 6) | (imm54 << 4);
+
+    return (insn_t) {
+        .imm = imm,
+        .rd = RP2(data) + 8,
+        .rvc = true,
+    };
+}
+
+#define RN(r) insn.f ? fpnames[insn.r] : gpnames[insn.r]
+
+#define PRINT_none() snprintf(buff, sizeof(buff), "%s", insn.name); return buff
+#define PRINT_rd_rs1_rs2() snprintf(buff, sizeof(buff), "%s\t%s, %s, %s", insn.name, RN(rd), RN(rs1), RN(rs2)); return buff
+#define PRINT_rd_rs1_rs2_rs3() snprintf(buff, sizeof(buff), "%s\t%s, %s, %s, %s", insn.name, RN(rd), RN(rs1), RN(rs2), RN(rs3)); return buff
+#define PRINT_rd_rs1_imm() snprintf(buff, sizeof(buff), "%s\t%s, %s, %d", insn.name, RN(rd), RN(rs1), insn.imm); return buff
+#define PRINT_rd_rs1_immx() snprintf(buff, sizeof(buff), "%s\t%s, %s, 0x%x", insn.name, RN(rd), RN(rs1), insn.imm); return buff
+#define PRINT_rd_imm_rs1() snprintf(buff, sizeof(buff), "%s\t%s, %d(%s)", insn.name, RN(rd), insn.imm, RN(rs1)); return buff
+#define PRINT_rs2_imm_rs1() snprintf(buff, sizeof(buff), "%s\t%s, %d(%s)", insn.name, RN(rs2), insn.imm, RN(rs1)); return buff
+#define PRINT_rd_imm() snprintf(buff, sizeof(buff), "%s\t%s, %d", insn.name, RN(rd), insn.imm); return buff
+#define PRINT_rd_immx() snprintf(buff, sizeof(buff), "%s\t%s, 0x%x", insn.name, RN(rd), insn.imm); return buff
+#define PRINT_rs1_rs2_imm() snprintf(buff, sizeof(buff), "%s\t%s, %s, %d", insn.name, RN(rs1), RN(rs2), insn.imm); return buff
+#define PRINT_fd_fs1() snprintf(buff, sizeof(buff), "%s\t%s, %s", insn.name, fpnames[insn.rd], fpnames[insn.rs1]); return buff
+#define PRINT_xd_fs1() snprintf(buff, sizeof(buff), "%s\t%s, %s", insn.name, gpnames[insn.rd], fpnames[insn.rs1]); return buff
+#define PRINT_fd_xs1() snprintf(buff, sizeof(buff), "%s\t%s, %s", insn.name, fpnames[insn.rd], gpnames[insn.rs1]); return buff
+
+// TODO: display csr name
+#define PRINT_rd_csr_rs1() snprintf(buff, sizeof(buff), "%s\t%d, %s", insn.name, RN(rd), insn.csr, RN(rs1)); return buff
+#define PRINT_rd_csr_uimm() snprintf(buff, sizeof(buff), "%s\t%d, %d", insn.name, RN(rd), insn.csr, (uint32_t)insn.imm); return buff
+
+const char* rv64_print(uint32_t data, uintptr_t addr)
+{
+    static char buff[200] = {0};
+
+    insn_t insn = { 0 };
+    uint32_t quadrant = QUADRANT(data);
+    switch (quadrant) {
+    case 0x0: {
+        uint32_t copcode = COPCODE(data);
+
+        switch (copcode) {
+        case 0x0: /* C.ADDI4SPN */
+            insn =  insn_ciwtype_read(data);
+            insn.rs1 = 2; /* sp */
+            insn.name = "addi";
+            assert(insn.imm != 0);
+            PRINT_rd_rs1_imm();
+        case 0x1: /* C.FLD */
+            insn =  insn_cltype_read2(data);
+            insn.name = "fld";
+            insn.f = true;
+            PRINT_rd_imm_rs1();
+        case 0x2: /* C.LW */
+            insn =  insn_cltype_read(data);
+            insn.name = "lw";
+            PRINT_rd_imm_rs1();
+        case 0x3: /* C.LD */
+            insn =  insn_cltype_read2(data);
+            insn.name = "ld";
+            PRINT_rd_imm_rs1();
+        case 0x5: /* C.FSD */
+            insn =  insn_cstype_read(data);
+            insn.name = "fsd";
+            insn.f = true;
+            PRINT_rs2_imm_rs1();
+        case 0x6: /* C.SW */
+            insn =  insn_cstype_read2(data);
+            insn.name = "sw";
+            PRINT_rd_imm_rs1();
+        case 0x7: /* C.SD */
+            insn =  insn_cstype_read(data);
+            insn.name = "sd";
+            PRINT_rs2_imm_rs1();
+        }
+    }
+    case 0x1: {
+        uint32_t copcode = COPCODE(data);
+
+        switch (copcode) {
+        case 0x0: /* C.ADDI */
+            insn =  insn_citype_read(data);
+            insn.rs1 = insn.rd;
+            insn.name = "addi";
+            PRINT_rd_rs1_imm();
+        case 0x1: /* C.ADDIW */
+            insn =  insn_citype_read(data);
+            assert(insn.rd != 0);
+            insn.rs1 = insn.rd;
+            insn.name = "addiw";
+            PRINT_rd_rs1_imm();
+        case 0x2: /* C.LI */
+            insn =  insn_citype_read(data);
+            insn.rs1 = 0;
+            insn.name = "addi";
+            PRINT_rd_rs1_imm();
+        case 0x3: {
+            int32_t rd = RC1(data);
+            if (rd == 2) { /* C.ADDI16SP */
+                insn =  insn_citype_read3(data);
+                assert(insn.imm != 0);
+                insn.rs1 = insn.rd;
+                insn.name = "addi";
+                PRINT_rd_rs1_imm();
+            } else { /* C.LUI */
+                insn =  insn_citype_read5(data);
+                assert(insn.imm != 0);
+                insn.name = "lui";
+                PRINT_rd_immx();
+            }
+        }
+        case 0x4: {
+            uint32_t cfunct2high = CFUNCT2HIGH(data);
+
+            switch (cfunct2high) {
+            case 0x0:   /* C.SRLI */
+            case 0x1:   /* C.SRAI */
+            case 0x2: { /* C.ANDI */
+                insn =  insn_cbtype_read2(data);
+                insn.rs1 = insn.rd;
+
+                if (cfunct2high == 0x0) {
+                    insn.name = "srli";
+                } else if (cfunct2high == 0x1) {
+                    insn.name = "srai";
+                } else {
+                    insn.name = "andi";
+                }
+                PRINT_rd_rs1_imm();
+            }
+            case 0x3: {
+                uint32_t cfunct1 = CFUNCT1(data);
+
+                switch (cfunct1) {
+                case 0x0: {
+                    uint32_t cfunct2low = CFUNCT2LOW(data);
+
+                    insn =  insn_catype_read(data);
+                    insn.rs1 = insn.rd;
+
+                    switch (cfunct2low) {
+                    case 0x0: /* C.SUB */
+                        insn.name = "sub";
+                        break;
+                    case 0x1: /* C.XOR */
+                        insn.name = "xor";
+                        break;
+                    case 0x2: /* C.OR */
+                        insn.name = "or";
+                        break;
+                    case 0x3: /* C.AND */
+                        insn.name = "and";
+                        break;
+                    }
+                    break;
+                }
+                case 0x1: {
+                    uint32_t cfunct2low = CFUNCT2LOW(data);
+
+                    insn =  insn_catype_read(data);
+                    insn.rs1 = insn.rd;
+
+                    switch (cfunct2low) {
+                    case 0x0: /* C.SUBW */
+                        insn.name = "subw";
+                        break;
+                    case 0x1: /* C.ADDW */
+                        insn.name = "addw";
+                        break;
+                    }
+                    break;
+                }
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            }
+        }
+        case 0x5: /* C.J */
+            insn =  insn_cjtype_read(data);
+            insn.rd = 0;
+            insn.name = "jal";
+            PRINT_rd_imm();
+        case 0x6: /* C.BEQZ */
+        case 0x7: /* C.BNEZ */
+            insn =  insn_cbtype_read(data);
+            insn.rs2 = 0;
+            insn.name = copcode == 0x6 ? "beq" : "bne";
+            PRINT_rs1_rs2_imm();
+        }
+    }
+    case 0x2: {
+        uint32_t copcode = COPCODE(data);
+        switch (copcode) {
+        case 0x0: /* C.SLLI */
+            insn =  insn_citype_read(data);
+            insn.rs1 = insn.rd;
+            insn.name = "slli";
+            PRINT_rd_rs1_imm();
+        case 0x1: /* C.FLDSP */
+            insn =  insn_citype_read2(data);
+            insn.rs1 = 2; /* sp */
+            insn.f = true;
+            insn.name = "fld";
+            PRINT_rd_imm_rs1();
+        case 0x2: /* C.LWSP */
+            insn =  insn_citype_read4(data);
+            assert(insn.rd != 0);
+            insn.rs1 = 2; /* sp */
+            insn.name = "lw";
+            PRINT_rd_imm_rs1();
+        case 0x3: /* C.LDSP */
+            insn =  insn_citype_read2(data);
+            assert(insn.rd != 0);
+            insn.rs1 = 2; /* sp */
+            insn.name = "ld";
+            PRINT_rd_imm_rs1();
+        case 0x4: {
+            uint32_t cfunct1 = CFUNCT1(data);
+
+            switch (cfunct1) {
+            case 0x0: {
+                insn =  insn_crtype_read(data);
+
+                if (insn.rs2 == 0) { /* C.JR */
+                    assert(insn.rs1 != 0);
+                    insn.rd = 0;
+                    insn.name = "jalr";
+                    PRINT_rd_imm_rs1();
+                } else { /* C.MV */
+                    insn.rd = insn.rs1;
+                    insn.rs1 = 0;
+                    insn.name = "add";
+                    PRINT_rd_rs1_rs2();
+                }
+            }
+            case 0x1: {
+                insn =  insn_crtype_read(data);
+                if (insn.rs1 == 0 && insn.rs2 == 0) { /* C.EBREAK */
+                    insn.name = "ebreak";
+                    PRINT_none();
+                } else if (insn.rs2 == 0) { /* C.JALR */
+                    insn.rd = 1; /* ra */
+                    insn.name = "jalr";
+                    PRINT_rd_imm_rs1();
+                } else { /* C.ADD */
+                    insn.rd = insn.rs1;
+                    insn.name = "add";
+                    PRINT_rd_rs1_rs2();
+                }
+            }
+            }
+        }
+        case 0x5: /* C.FSDSP */
+            insn =  insn_csstype_read(data);
+            insn.rs1 = 2; /* sp */
+            insn.f = true;
+            insn.name = "fsd";
+            PRINT_rs2_imm_rs1();
+        case 0x6: /* C.SWSP */
+            insn =  insn_csstype_read2(data);
+            insn.rs1 = 2; /* sp */
+            insn.name = "sw";
+            PRINT_rs2_imm_rs1();
+        case 0x7: /* C.SDSP */
+            insn =  insn_csstype_read(data);
+            insn.rs1 = 2; /* sp */
+            insn.name = "sd";
+            PRINT_rs2_imm_rs1();
+        }
+    }
+    case 0x3: {
+        uint32_t opcode = OPCODE(data);
+        switch (opcode) {
+        case 0x0: {
+            uint32_t funct3 = FUNCT3(data);
+
+            insn =  insn_itype_read(data);
+            switch (funct3) {
+            case 0x0: /* LB */
+                insn.name = "lb";
+                break;
+            case 0x1: /* LH */
+                insn.name = "lh";
+                break;
+            case 0x2: /* LW */
+                insn.name = "lw";
+                break;
+            case 0x3: /* LD */
+                insn.name = "ld";
+                break;
+            case 0x4: /* LBU */
+                insn.name = "lbu";
+                break;
+            case 0x5: /* LHU */
+                insn.name = "lhu";
+                break;
+            case 0x6: /* LWU */
+                insn.name = "lwu";
+                break;
+            }
+            PRINT_rd_imm_rs1();
+        }
+        case 0x1: {
+            uint32_t funct3 = FUNCT3(data);
+
+            insn =  insn_itype_read(data);
+            switch (funct3) {
+            case 0x2: /* FLW */
+                insn.name = "flw";
+                insn.f = true;
+                break;
+            case 0x3: /* FLD */
+                insn.name = "fld";
+                insn.f = true;
+                break;
+            }
+            PRINT_rd_imm_rs1();
+        }
+        case 0x3: {
+            uint32_t funct3 = FUNCT3(data);
+
+            switch (funct3) {
+            case 0x0: { /* FENCE */
+                insn.name = "fence";
+                // TODO: handle pred succ
+                PRINT_none();
+            }
+            case 0x1: { /* FENCE.I */
+                insn.name = "fence.i";
+                PRINT_none();
+            }
+            }
+        }
+        case 0x4: {
+            int hex = 0;
+            uint32_t funct3 = FUNCT3(data);
+
+            insn =  insn_itype_read(data);
+            switch (funct3) {
+            case 0x0: /* ADDI */
+                insn.name = "addi";
+                break;
+            case 0x1: {
+                uint32_t imm116 = IMM116(data);
+                if (imm116 == 0) { /* SLLI */
+                    insn.name = "slli";
+                }
+                break;
+            }
+            case 0x2: /* SLTI */
+                insn.name = "slti";
+                break;
+            case 0x3: /* SLTIU */
+                insn.name = "sltiu";
+                break;
+            case 0x4: /* XORI */
+                insn.name = "xori";
+                hex = 1;
+                break;
+            case 0x5: {
+                uint32_t imm116 = IMM116(data);
+
+                if (imm116 == 0x0) { /* SRLI */
+                    insn.name = "srli";
+                } else if (imm116 == 0x10) { /* SRAI */
+                    insn.name = "srai";
+                }
+                break;
+            }
+            case 0x6: /* ORI */
+                insn.name = "ori";
+                hex = 1;
+                break;
+            case 0x7: /* ANDI */
+                insn.name = "andi";
+                hex = 1;
+                break;
+            }
+            if(hex) {
+                PRINT_rd_rs1_immx();
+            } else {
+                PRINT_rd_rs1_imm();
+            }
+        }
+        case 0x5: /* AUIPC */
+            insn =  insn_utype_read(data);
+            insn.name = "auipc";
+            PRINT_rd_imm();
+        case 0x6: {
+            uint32_t funct3 = FUNCT3(data);
+            uint32_t funct7 = FUNCT7(data);
+
+            insn =  insn_itype_read(data);
+
+            switch (funct3) {
+            case 0x0: /* ADDIW */
+                insn.name = "addiw";
+                break;
+            case 0x1: /* SLLIW */
+                assert(funct7 == 0);
+                insn.name = "slliw";
+                break;
+            case 0x5: {
+                switch (funct7) {
+                case 0x0: /* SRLIW */
+                    insn.name = "srliw";
+                    break;
+                case 0x20: /* SRAIW */
+                    insn.name = "sraiw";
+                    break;
+                }
+            }
+            }
+            PRINT_rd_rs1_rs2();
+        }
+        case 0x8: {
+            uint32_t funct3 = FUNCT3(data);
+
+            insn =  insn_stype_read(data);
+            switch (funct3) {
+            case 0x0: /* SB */
+                insn.name = "sb";
+                break;
+            case 0x1: /* SH */
+                insn.name = "sh";
+                break;
+            case 0x2: /* SW */
+                insn.name = "sw";
+                break;
+            case 0x3: /* SD */
+                insn.name = "sd";
+                break;
+            }
+            PRINT_rs2_imm_rs1();
+        }
+        case 0x9: {
+            uint32_t funct3 = FUNCT3(data);
+
+            insn =  insn_stype_read(data);
+            switch (funct3) {
+            case 0x2: /* FSW */
+                insn.name = "fsw";
+                insn.f = true;
+                break;
+            case 0x3: /* FSD */
+                insn.name = "fsd";
+                insn.f = true;
+                break;
+            }
+            PRINT_rs2_imm_rs1();
+        }
+        case 0xc: {
+            insn =  insn_rtype_read(data);
+
+            uint32_t funct3 = FUNCT3(data);
+            uint32_t funct7 = FUNCT7(data);
+
+            switch (funct7) {
+            case 0x0: {
+                switch (funct3) {
+                case 0x0: /* ADD */
+                    insn.name = "add";
+                    break;
+                case 0x1: /* SLL */
+                    insn.name = "sll";
+                    break;
+                case 0x2: /* SLT */
+                    insn.name = "slt";
+                    break;
+                case 0x3: /* SLTU */
+                    insn.name = "sltu";
+                    break;
+                case 0x4: /* XOR */
+                    insn.name = "xor";
+                    break;
+                case 0x5: /* SRL */
+                    insn.name = "srl";
+                    break;
+                case 0x6: /* OR */
+                    insn.name = "or";
+                    break;
+                case 0x7: /* AND */
+                    insn.name = "and";
+                    break;
+                }
+            }
+            break;
+            case 0x1: {
+                switch (funct3) {
+                case 0x0: /* MUL */
+                    insn.name = "mul";
+                    break;
+                case 0x1: /* MULH */
+                    insn.name = "mulh";
+                    break;
+                case 0x2: /* MULHSU */
+                    insn.name = "mulhsu";
+                    break;
+                case 0x3: /* MULHU */
+                    insn.name = "mulhu";
+                    break;
+                case 0x4: /* DIV */
+                    insn.name = "div";
+                    break;
+                case 0x5: /* DIVU */
+                    insn.name = "divu";
+                    break;
+                case 0x6: /* REM */
+                    insn.name = "rem";
+                    break;
+                case 0x7: /* REMU */
+                    insn.name = "remu";
+                    break;
+                }
+            }
+            break;
+            case 0x20: {
+                switch (funct3) {
+                case 0x0: /* SUB */
+                    insn.name = "sub";
+                    break;
+                case 0x5: /* SRA */
+                    insn.name = "sra";
+                    break;
+                }
+            }
+            break;
+            }
+            PRINT_rd_rs1_rs2();
+        }
+        case 0xd: /* LUI */
+            insn =  insn_utype_read(data);
+            insn.name = "lui";
+            PRINT_rd_immx();
+        case 0xe: {
+            insn =  insn_rtype_read(data);
+
+            uint32_t funct3 = FUNCT3(data);
+            uint32_t funct7 = FUNCT7(data);
+
+            switch (funct7) {
+            case 0x0: {
+                switch (funct3) {
+                case 0x0: /* ADDW */
+                    insn.name = "addw";
+                    break;
+                case 0x1: /* SLLW */
+                    insn.name = "sllw";
+                    break;
+                case 0x5: /* SRLW */
+                    insn.name = "srlw";
+                    break;
+                }
+            }
+            break;
+            case 0x1: {
+                switch (funct3) {
+                case 0x0: /* MULW */
+                    insn.name = "mulw";
+                    break;
+                case 0x4: /* DIVW */
+                    insn.name = "divw";
+                    break;
+                case 0x5: /* DIVUW */
+                    insn.name = "divuw";
+                    break;
+                case 0x6: /* REMW */
+                    insn.name = "remw";
+                    break;
+                case 0x7: /* REMUW */
+                    insn.name = "remuw";
+                    break;
+                }
+            }
+            break;
+            case 0x20: {
+                switch (funct3) {
+                case 0x0: /* SUBW */
+                    insn.name = "subw";
+                    break;
+                case 0x5: /* SRAW */
+                    insn.name = "sraw";
+                    break;
+                }
+            }
+            break;
+            }
+            PRINT_rd_rs1_rs2();
+        }
+        case 0x10: {
+            uint32_t funct2 = FUNCT2(data);
+
+            insn =  insn_fprtype_read(data);
+            switch (funct2) {
+            case 0x0: /* FMADD.S */
+                insn.name = "fmadd.s";
+                insn.f = true;
+                break;
+            case 0x1: /* FMADD.D */
+                insn.name = "fmadd.d";
+                insn.f = true;
+                break;
+            }
+            PRINT_rd_rs1_rs2();
+        }
+        case 0x11: {
+            uint32_t funct2 = FUNCT2(data);
+
+            insn =  insn_fprtype_read(data);
+            switch (funct2) {
+            case 0x0: /* FMSUB.S */
+                insn.name = "fmsub.s";
+                insn.f = true;
+                break;
+            case 0x1: /* FMSUB.D */
+                insn.name = "fmsub.d";
+                insn.f = true;
+                break;
+            }
+            PRINT_rd_rs1_rs2();
+        }
+        case 0x12: {
+            uint32_t funct2 = FUNCT2(data);
+
+            insn =  insn_fprtype_read(data);
+            switch (funct2) {
+            case 0x0: /* FNMSUB.S */
+                insn.name = "fnmsub.s";
+                insn.f = true;
+                break;
+            case 0x1: /* FNMSUB.D */
+                insn.name = "fnmsub.d";
+                insn.f = true;
+                break;
+            }
+            PRINT_rd_rs1_rs2_rs3();
+        }
+        case 0x13: {
+            uint32_t funct2 = FUNCT2(data);
+
+            insn =  insn_fprtype_read(data);
+            switch (funct2) {
+            case 0x0: /* FNMADD.S */
+                insn.name = "fnmadd.s";
+                insn.f = true;
+                break;
+            case 0x1: /* FNMADD.D */
+                insn.name = "fnmadd.d";
+                insn.f = true;
+                break;
+            }
+            PRINT_rd_rs1_rs2_rs3();
+        }
+        case 0x14: {
+            uint32_t funct7 = FUNCT7(data);
+
+            insn =  insn_rtype_read(data);
+            insn.f = true;
+            switch (funct7) {
+            case 0x0:  /* FADD.S */
+                insn.name = "fadd.s";
+                PRINT_rd_rs1_rs2();
+            case 0x1:  /* FADD.D */
+                insn.name = "fadd.d";
+                PRINT_rd_rs1_rs2();
+            case 0x4:  /* FSUB.S */
+                insn.name = "fsub.s";
+                PRINT_rd_rs1_rs2();
+            case 0x5:  /* FSUB.D */
+                insn.name = "fsub.d";
+                PRINT_rd_rs1_rs2();
+            case 0x8:  /* FMUL.S */
+                insn.name = "fmul.s";
+                PRINT_rd_rs1_rs2();
+            case 0x9:  /* FMUL.D */
+                insn.name = "fmul.d";
+                PRINT_rd_rs1_rs2();
+            case 0xc:  /* FDIV.S */
+                insn.name = "fdiv.s";
+                PRINT_rd_rs1_rs2();
+            case 0xd:  /* FDIV.D */
+                insn.name = "fdiv.d";
+                PRINT_rd_rs1_rs2();
+            case 0x10: {
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FSGNJ.S */
+                    insn.name = "fsgnj.s";
+                    break;
+                case 0x1: /* FSGNJN.S */
+                    insn.name = "fsgnjn.s";
+                    break;
+                case 0x2: /* FSGNJX.S */
+                    insn.name = "fsgnjx.s";
+                    break;
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            case 0x11: {
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FSGNJ.D */
+                    insn.name = "fsgnj.d";
+                    break;
+                case 0x1: /* FSGNJN.D */
+                    insn.name = "fsgnjn.d";
+                    break;
+                case 0x2: /* FSGNJX.D */
+                    insn.name = "fsgnjx.d";
+                    break;
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            case 0x14: {
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FMIN.S */
+                    insn.name = "fmin.s";
+                    break;
+                case 0x1: /* FMAX.S */
+                    insn.name = "fmax.s";
+                    break;
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            case 0x15: {
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FMIN.D */
+                    insn.name = "fmin.d";
+                    break;
+                case 0x1: /* FMAX.D */
+                    insn.name = "fmax.d";
+                    break;
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            case 0x20: /* FCVT.S.D */
+                assert(RS2(data) == 1);
+                insn.name = "fcvt.s.d";
+                PRINT_fd_fs1();
+            case 0x21: /* FCVT.D.S */
+                assert(RS2(data) == 0);
+                insn.name = "fcvt.d.s";
+                PRINT_fd_fs1();
+            case 0x2c: /* FSQRT.S */
+                assert(insn.rs2 == 0);
+                insn.name = "fsqrt.s";
+                PRINT_fd_fs1();
+            case 0x2d: /* FSQRT.D */
+                assert(insn.rs2 == 0);
+                insn.name = "fsqrt.d";
+                PRINT_fd_fs1();
+            case 0x50: {
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FLE.S */
+                    insn.name = "fle.s";
+                    break;
+                case 0x1: /* FLT.S */
+                    insn.name = "flt.s";
+                    break;
+                case 0x2: /* FEQ.S */
+                    insn.name = "feq.s";
+                    break;
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            case 0x51: {
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FLE.D */
+                    insn.name = "fle.d";
+                    break;
+                case 0x1: /* FLT.D */
+                    insn.name = "flt.d";
+                    break;
+                case 0x2: /* FEQ.D */
+                    insn.name = "feq.d";
+                    break;
+                }
+                PRINT_rd_rs1_rs2();
+            }
+            case 0x60: {
+                uint32_t rs2 = RS2(data);
+
+                switch (rs2) {
+                case 0x0: /* FCVT.W.S */
+                    insn.name = "fcvt.w.s";
+                    break;
+                case 0x1: /* FCVT.WU.S */
+                    insn.name = "fcvt.wu.s";
+                    break;
+                case 0x2: /* FCVT.L.S */
+                    insn.name = "fcvt.l.s";
+                    break;
+                case 0x3: /* FCVT.LU.S */
+                    insn.name = "fcvt.lu.s";
+                    break;
+                }
+                PRINT_xd_fs1();
+            }
+            case 0x61: {
+                uint32_t rs2 = RS2(data);
+
+                switch (rs2) {
+                case 0x0: /* FCVT.W.D */
+                    insn.name = "fcvt.w.d";
+                    break;
+                case 0x1: /* FCVT.WU.D */
+                    insn.name = "fcvt.wu.d";
+                    break;
+                case 0x2: /* FCVT.L.D */
+                    insn.name = "fcvt.l.d";
+                    break;
+                case 0x3: /* FCVT.LU.D */
+                    insn.name = "fcvt.lu.d";
+                    break;
+                }
+                PRINT_xd_fs1();
+            }
+            case 0x68: {
+                uint32_t rs2 = RS2(data);
+
+                switch (rs2) {
+                case 0x0: /* FCVT.S.W */
+                    insn.name = "fcvt.s.w";
+                    break;
+                case 0x1: /* FCVT.S.WU */
+                    insn.name = "fcvt.s.wu";
+                    break;
+                case 0x2: /* FCVT.S.L */
+                    insn.name = "fcvt.s.l";
+                    break;
+                case 0x3: /* FCVT.S.LU */
+                    insn.name = "fcvt.s.lu";
+                    break;
+                }
+                PRINT_fd_xs1();
+            }
+            case 0x69: {
+                uint32_t rs2 = RS2(data);
+
+                switch (rs2) {
+                case 0x0: /* FCVT.D.W */
+                    insn.name = "fcvt.d.w";
+                    break;
+                case 0x1: /* FCVT.D.WU */
+                    insn.name = "fcvt.d.wu";
+                    break;
+                case 0x2: /* FCVT.D.L */
+                    insn.name = "fcvt.d.l";
+                    break;
+                case 0x3: /* FCVT.D.LU */
+                    insn.name = "fcvt.d.lu";
+                    break;
+                }
+                PRINT_xd_fs1();
+            }
+            case 0x70: {
+                assert(RS2(data) == 0);
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FMV.X.W */
+                    insn.name = "fmv.x.w";
+                    break;
+                case 0x1: /* FCLASS.S */
+                    insn.name = "fclass.s";
+                    break;
+                }
+                PRINT_xd_fs1();
+            }
+            case 0x71: {
+                assert(RS2(data) == 0);
+                uint32_t funct3 = FUNCT3(data);
+
+                switch (funct3) {
+                case 0x0: /* FMV.X.D */
+                    insn.name = "fmv.x.d";
+                    break;
+                case 0x1: /* FCLASS.D */
+                    insn.name = "fclass.d";
+                    break;
+                }
+                PRINT_xd_fs1();
+            }
+            case 0x78: /* FMV_W_X */
+                assert(RS2(data) == 0 && FUNCT3(data) == 0);
+                insn.name = "fmv.w.x";
+                PRINT_fd_xs1();
+            case 0x79: /* FMV_D_X */
+                assert(RS2(data) == 0 && FUNCT3(data) == 0);
+                insn.name = "fmv.d.x";
+                PRINT_fd_xs1();
+            }
+        }
+        case 0x18: {
+            insn =  insn_btype_read(data);
+
+            uint32_t funct3 = FUNCT3(data);
+            switch (funct3) {
+            case 0x0: /* BEQ */
+                insn.name = "beq";
+                break;
+            case 0x1: /* BNE */
+                insn.name = "bne";
+                break;
+            case 0x4: /* BLT */
+                insn.name = "blt";
+                break;
+            case 0x5: /* BGE */
+                insn.name = "bge";
+                break;
+            case 0x6: /* BLTU */
+                insn.name = "bltu";
+                break;
+            case 0x7: /* BGEU */
+                insn.name = "bgeu";
+                break;
+            }
+
+            PRINT_rs1_rs2_imm();
+        }
+        case 0x19: /* JALR */
+            insn =  insn_itype_read(data);
+            insn.name = "jalr";
+            PRINT_rd_imm_rs1();
+        case 0x1b: /* JAL */
+            insn =  insn_jtype_read(data);
+            insn.name = "jal";
+            PRINT_rd_imm();
+        case 0x1c: {
+            if (data == 0x73) { /* ECALL */
+                insn.name = "ecall";
+                PRINT_none();
+            }
+
+            uint32_t funct3 = FUNCT3(data);
+            insn =  insn_csrtype_read(data);
+            switch(funct3) {
+            case 0x1: /* CSRRW */
+                insn.name = "csrrw";
+                PRINT_rd_csr_rs1();
+            case 0x2: /* CSRRS */
+                insn.name = "csrrs";
+                PRINT_rd_csr_rs1();
+            case 0x3: /* CSRRC */
+                insn.name = "csrrc";
+                PRINT_rd_csr_rs1();
+            case 0x5: /* CSRRWI */
+                insn.name = "csrrwi";
+                PRINT_rd_csr_uimm();
+            case 0x6: /* CSRRSI */
+                insn.name = "csrrsi";
+                PRINT_rd_csr_uimm();
+            case 0x7: /* CSRRCI */
+                insn.name = "csrrci";
+                PRINT_rd_csr_uimm();
+            }
+        }
+        }
+    }
+    }
+
+    snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(data));
+    return buff;
+}
diff --git a/src/dynarec/rv64/rv64_printer.h b/src/dynarec/rv64/rv64_printer.h
new file mode 100644
index 00000000..e22a208f
--- /dev/null
+++ b/src/dynarec/rv64/rv64_printer.h
@@ -0,0 +1,8 @@
+#ifndef _RV64_PRINTER_H_
+#define _RV64_PRINTER_H_
+#include <stdint.h>
+#include<stdbool.h>
+
+const char* rv64_print(uint32_t data, uint64_t addr);
+
+#endif //_RV64_PRINTER_H_
diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S
new file mode 100644
index 00000000..229910b9
--- /dev/null
+++ b/src/dynarec/rv64/rv64_prolog.S
@@ -0,0 +1,53 @@
+//arm prologue for dynarec
+//Save stuff, prepare stack and register
+//called with pointer to emu as 1st parameter
+//and address to jump to as 2nd parameter
+
+.text
+.align 4
+
+.extern rv64_next
+
+.global rv64_prolog
+rv64_prolog:
+    //save all 18 used register
+    addi    sp,  sp, -(8 * 14)
+    sd      ra, (sp)  // save ra
+    sd      x8, 8(sp) // save fp
+    sd      x18, 16(sp)
+    sd      x19, 24(sp)
+    sd      x20, 32(sp)
+    sd      x21, 40(sp)
+    sd      x22, 48(sp)
+    sd      x23, 56(sp)
+    sd      x24, 64(sp)
+    sd      x25, 72(sp)
+    sd      x26, 80(sp)
+    sd      x27, 88(sp)
+    fsd     f8, 96(sp)
+    fsd     f9, 104(sp)
+    // save f18-f27 too probably
+    //setup emu -> register
+    ld      x16, (a0)
+    ld      x17, 8(a0)
+    ld      x18, 16(a0)
+    ld      x19, 24(a0)
+    ld      x20, 32(a0)
+    ld      x21, 40(a0)
+    ld      x22, 48(a0)
+    ld      x23, 56(a0)
+    ld      x24, 64(a0)
+    ld      x25, 72(a0)
+    ld      x26, 80(a0)
+    ld      x27, 88(a0)
+    ld      x28, 96(a0)
+    ld      x29, 104(a0)
+    ld      x30, 112(a0)
+    ld      x31, 120(a0)
+    ld      x5, 128(a0)     //xFlags
+    ld      x6, 136(a0)     // xRIP
+    // setup xMASK
+    xori    x7, x0, -1
+    srli    x7, x7, 32
+    // jump to block
+    jalr    a1
diff --git a/src/emu/x64run.c b/src/emu/x64run.c
index 032614da..a2343ca3 100755
--- a/src/emu/x64run.c
+++ b/src/emu/x64run.c
@@ -424,10 +424,14 @@ x64emurun:
                 GB = EB->byte[0];
                 EB->byte[0] = tmp8u;
             } else {
+                #ifdef USE_CAS
+                GB = native_lock_xchg_b(EB, GB);
+                #else
                 do {
                     tmp8u = native_lock_read_b(EB);
                 } while(native_lock_write_b(EB, GB));
                 GB = tmp8u;
+                #endif
             }
             // dynarec use need it's own mecanism
 #else
@@ -461,10 +465,14 @@ x64emurun:
                 if(rex.w) {
                     GD->q[0] = native_lock_xchg(ED, GD->q[0]);
                 } else {
+                    #ifdef USE_CAS
+                    GD->q[0] = native_lock_xchg(ED, GD->dword[0]);
+                    #else
                     do {
                         tmp32u = native_lock_read_d(ED);
                     } while(native_lock_write_d(ED, GD->dword[0]));
                     GD->q[0] = tmp32u;
+                    #endif
                 }
             }
 #else
diff --git a/src/emu/x64run66f0.c b/src/emu/x64run66f0.c
index 7bf48a47..7b76fe32 100644
--- a/src/emu/x64run66f0.c
+++ b/src/emu/x64run66f0.c
@@ -34,6 +34,9 @@ uintptr_t Run66F0(x64emu_t *emu, rex_t rex, uintptr_t addr)
     int64_t tmp64s;
     uint64_t tmp64u, tmp64u2;
     reg64_t *oped, *opgd;
+    #ifdef USE_CAS
+    uint64_t tmpcas;
+    #endif
 
     opcode = F8;
     // REX prefix before the F0 are ignored
diff --git a/src/emu/x64runf0.c b/src/emu/x64runf0.c
index 13eed9a2..06465fa1 100644
--- a/src/emu/x64runf0.c
+++ b/src/emu/x64runf0.c
@@ -36,6 +36,9 @@ uintptr_t RunF0(x64emu_t *emu, rex_t rex, uintptr_t addr)
     int64_t tmp64s;

     uint64_t tmp64u, tmp64u2;

     reg64_t *oped, *opgd;

+    #ifdef USE_CAS

+    uint64_t tmpcas;

+    #endif

 

     opcode = F8;

     // REX prefix before the F0 are ignored

diff --git a/src/include/dynarec_rv64.h b/src/include/dynarec_rv64.h
new file mode 100644
index 00000000..9abb704b
--- /dev/null
+++ b/src/include/dynarec_rv64.h
@@ -0,0 +1,10 @@
+#ifndef __DYNAREC_RV64_H_
+#define __DYNAREC_RV64_H_
+
+typedef struct dynablock_s dynablock_t;
+typedef struct x64emu_s x64emu_t;
+
+void CancelBlock64();
+void* FillBlock64(dynablock_t* block, uintptr_t addr);
+
+#endif //__DYNAREC_RV64_H_
\ No newline at end of file
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index 0558a88a..9526f49b 100755
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -481,6 +481,25 @@ void copyUCTXreg2Emu(x64emu_t* emu, ucontext_t* p, uintptr_t ip) {
         emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[30];
         emu->ip.q[0] = ip;
         emu->eflags.x64 = p->uc_mcontext.__gregs[31];
+#elif defined(RV64)
+        emu->regs[_AX].q[0] = p->uc_mcontext.__gregs[16];
+        emu->regs[_CX].q[0] = p->uc_mcontext.__gregs[17];
+        emu->regs[_DX].q[0] = p->uc_mcontext.__gregs[18];
+        emu->regs[_BX].q[0] = p->uc_mcontext.__gregs[19];
+        emu->regs[_SP].q[0] = p->uc_mcontext.__gregs[20];
+        emu->regs[_BP].q[0] = p->uc_mcontext.__gregs[21];
+        emu->regs[_SI].q[0] = p->uc_mcontext.__gregs[22];
+        emu->regs[_DI].q[0] = p->uc_mcontext.__gregs[23];
+        emu->regs[_R8].q[0] = p->uc_mcontext.__gregs[24];
+        emu->regs[_R9].q[0] = p->uc_mcontext.__gregs[25];
+        emu->regs[_R10].q[0] = p->uc_mcontext.__gregs[26];
+        emu->regs[_R11].q[0] = p->uc_mcontext.__gregs[27];
+        emu->regs[_R12].q[0] = p->uc_mcontext.__gregs[28];
+        emu->regs[_R13].q[0] = p->uc_mcontext.__gregs[29];
+        emu->regs[_R14].q[0] = p->uc_mcontext.__gregs[30];
+        emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[31];
+        emu->ip.q[0] = ip;
+        emu->eflags.x64 = p->uc_mcontext.__gregs[5];
 #else
 #error  Unsupported architecture
 #endif
@@ -512,6 +531,13 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
     if(db) {
         frame = (uintptr_t)p->uc_mcontext.__gregs[12+_SP];
     }
+#elif defined(RV64)
+    ucontext_t *p = (ucontext_t *)ucntx;
+    void * pc = (void*)p->uc_mcontext.__gregs[0];
+    dynablock_t* db = (dynablock_t*)cur_db;//FindDynablockFromNativeAddress(pc);
+    if(db) {
+        frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP];
+    }
 #else
 #error Unsupported architecture
 #endif
@@ -604,6 +630,26 @@ void my_sigactionhandler_oldcode(int32_t sig, int simple, siginfo_t* info, void
         sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[30];
         sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc);
     }
+#elif defined(RV64)
+    if(db) {
+        sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.__gregs[16];
+        sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.__gregs[17];
+        sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.__gregs[18];
+        sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.__gregs[19];
+        sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.__gregs[20];
+        sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.__gregs[21];
+        sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.__gregs[22];
+        sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.__gregs[23];
+        sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.__gregs[24];
+        sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.__gregs[25];
+        sigcontext->uc_mcontext.gregs[X64_R10] = p->uc_mcontext.__gregs[26];
+        sigcontext->uc_mcontext.gregs[X64_R11] = p->uc_mcontext.__gregs[27];
+        sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.__gregs[28];
+        sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.__gregs[29];
+        sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.__gregs[30];
+        sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[31];
+        sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc);
+    }
 #else
 #error Unsupported architecture
 #endif
@@ -902,6 +948,13 @@ void my_box64signalhandler(int32_t sig, siginfo_t* info, void * ucntx)
                     ejb->emu->xmm[2].u128 = fpsimd->vregs[2];
                     ejb->emu->xmm[3].u128 = fpsimd->vregs[3];
                 }*/
+#elif defined(RV64)
+                /*if(fpsimd) {
+                    ejb->emu->xmm[0].u128 = fpsimd->vregs[0];
+                    ejb->emu->xmm[1].u128 = fpsimd->vregs[1];
+                    ejb->emu->xmm[2].u128 = fpsimd->vregs[2];
+                    ejb->emu->xmm[3].u128 = fpsimd->vregs[3];
+                }*/
 #else
 #error  Unsupported architecture
 #endif
@@ -1036,6 +1089,14 @@ exit(-1);
             x64pc = getX64Address(db, (uintptr_t)pc);
             rsp = (void*)p->uc_mcontext.__gregs[12+_SP];
         }
+#elif defined(RV64)
+        if(db && p->uc_mcontext.__gregs[10]>0x10000) {
+            emu = (x64emu_t*)p->uc_mcontext.__gregs[10];
+        }
+        if(db) {
+            x64pc = getX64Address(db, (uintptr_t)pc);
+            rsp = (void*)p->uc_mcontext.__gregs[16+_SP];
+        }
 #else
 #error Unsupported Architecture
 #endif //arch
@@ -1165,6 +1226,18 @@ exit(-1);
             for (int i=-4; i<4; ++i) {
                 printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8));
             }
+#elif defined(RV64)
+        if(db) {
+            shown_regs = 1;
+            for (int i=0; i<16; ++i) {
+                if(!(i%4)) printf_log(log_minimum, "\n");
+                printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]);
+            }
+        }
+        if(rsp!=addr)
+            for (int i=-4; i<4; ++i) {
+                printf_log(log_minimum, "%sRSP%c0x%02x:0x%016lx", (i%4)?" ":"\n", i<0?'-':'+', abs(i)*8, *(uintptr_t*)(rsp+i*8));
+            }
 #else
         #warning TODO
 #endif
@@ -1203,6 +1276,8 @@ void my_sigactionhandler(int32_t sig, siginfo_t* info, void * ucntx)
     void * pc = (void*)p->uc_mcontext.pc;
     #elif defined(LA464)
     void * pc = (void*)p->uc_mcontext.__pc;
+    #elif defined(RV64)
+    void * pc = (void*)p->uc_mcontext.__gregs[0];
     #else
     #error Unsupported architecture
     #endif
diff --git a/src/main.c b/src/main.c
index 8f956361..7a0c2b1b 100755
--- a/src/main.c
+++ b/src/main.c
@@ -353,6 +353,9 @@ HWCAP2_ECV
 #elif defined(LA464)
     printf_log(LOG_INFO, "Dynarec for LoongArch");
     printf_log(LOG_INFO, " PageSize:%zd ", box64_pagesize);
+#elif defined(RV64)
+    printf_log(LOG_INFO, "Dynarec for RISC-V");
+    printf_log(LOG_INFO, " PageSize:%zd ", box64_pagesize);
 #else
 #error Unsupported architecture
 #endif