about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-12-12 18:51:24 +0800
committerGitHub <noreply@github.com>2024-12-12 11:51:24 +0100
commit55d6971a23d43f3f80919b47bc54b46192a89040 (patch)
tree1c962ac0032a6d1794543616730d8c856966db77 /src
parent7168167400d7fbe60e0d9034d95bdbcbf302af99 (diff)
downloadbox64-55d6971a23d43f3f80919b47bc54b46192a89040.tar.gz
box64-55d6971a23d43f3f80919b47bc54b46192a89040.zip
[RV64_DYNAREC] New register mapping (#2139)
* [RV64_DYNAREC] New register mapping

* Fix
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_0.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_1.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c8
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c68
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c68
-rw-r--r--src/dynarec/rv64/dynarec_rv64_64.c8
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c38
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_660f38.c26
-rw-r--r--src/dynarec/rv64/dynarec_rv64_d9.c36
-rw-r--r--src/dynarec/rv64/dynarec_rv64_db.c9
-rw-r--r--src/dynarec/rv64/dynarec_rv64_dd.c5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_df.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c46
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c116
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h76
-rw-r--r--src/dynarec/rv64/dynarec_rv64_private.h12
-rw-r--r--src/dynarec/rv64/rv64_emitter.h84
-rw-r--r--src/dynarec/rv64/rv64_epilog.S119
-rw-r--r--src/dynarec/rv64/rv64_mapping.h119
-rw-r--r--src/dynarec/rv64/rv64_next.S83
-rw-r--r--src/dynarec/rv64/rv64_printer.c116
-rw-r--r--src/dynarec/rv64/rv64_prolog.S136
-rw-r--r--src/libtools/signal32.c16
-rw-r--r--src/libtools/signals.c70
25 files changed, 639 insertions, 638 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c
index ed2dfde0..1a0290d6 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_0.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_0.c
@@ -541,7 +541,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETEB(x1, 0);
             GETGB(x2);
-            emit_cmp8(dyn, ninst, x1, x2, x9, x4, x5, x6);
+            emit_cmp8(dyn, ninst, x1, x2, x7, x4, x5, x6);
             break;
         case 0x39:
             INST_NAME("CMP Ed, Gd");
@@ -557,7 +557,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETEB(x1, 0);
             GETGB(x2);
-            emit_cmp8(dyn, ninst, x2, x1, x9, x4, x5, x6);
+            emit_cmp8(dyn, ninst, x2, x1, x7, x4, x5, x6);
             break;
         case 0x3B:
             INST_NAME("CMP Gd, Ed");
diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c
index 9940d5ec..bee9fa33 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_1.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_1.c
@@ -288,7 +288,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
             GETIP(ip);
             STORE_XEMU_CALL(x3);
-            CALL(native_priv, -1);
+            CALL(native_priv, -1, 0, 0);
             LOAD_XEMU_CALL();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             *need_epilog = 0;
@@ -300,7 +300,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
             GETIP(ip);
             STORE_XEMU_CALL(x3);
-            CALL(native_priv, -1);
+            CALL(native_priv, -1, 0, 0);
             LOAD_XEMU_CALL();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             *need_epilog = 0;
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 6472d643..24968c74 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -118,7 +118,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     u8 = F8;
                     if (u8) {
                         ADDI(x2, xZR, u8);
-                        emit_cmp8(dyn, ninst, x1, x2, x9, x4, x5, x6);
+                        emit_cmp8(dyn, ninst, x1, x2, x7, x4, x5, x6);
                     } else {
                         emit_cmp8_0(dyn, ninst, x1, x3, x4);
                     }
@@ -175,7 +175,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     else
                         i64 = F8S;
                     MOV64xw(x5, i64);
-                    emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x9);
+                    emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x7);
                     WBACK;
                     break;
                 case 3: // SBB
@@ -308,13 +308,13 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 ADDI(x4, xZR, 0xff);
                 SLL(x4, x4, x1);
                 NOT(x4, x4);
-                SLL(x9, gd, x1);
+                SLL(x7, gd, x1);
 
                 // do aligned ll/sc sequence, reusing x2 (ed might be x2 but is no longer needed)
                 MARKLOCK;
                 LR_W(x2, x6, 1, 1);
                 AND(x5, x2, x4);
-                OR(x5, x5, x9);
+                OR(x5, x5, x7);
                 SC_W(x5, x5, x6, 1, 1);
                 BNEZ_MARKLOCK(x5);
 
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index d4ff674a..8afa8442 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -63,7 +63,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     GETEB(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(rol8, ed, x3);
+                    CALL_(rol8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 1:
@@ -73,7 +73,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     GETEB(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(ror8, ed, x3);
+                    CALL_(ror8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 2:
@@ -84,7 +84,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     GETEB(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(rcl8, ed, x3);
+                    CALL_(rcl8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 3:
@@ -95,7 +95,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     GETEB(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(rcr8, ed, x3);
+                    CALL_(rcr8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 4:
@@ -196,7 +196,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     u8 = (F8) & (rex.w ? 0x3f : 0x1f);
                     MOV32w(x2, u8);
                     GETEDW(x4, x1, 0);
-                    CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4);
+                    CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4, x1, x2);
                     WBACK;
                     if (!wback && !rex.w) ZEROUP(ed);
                     break;
@@ -208,7 +208,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     u8 = (F8) & (rex.w ? 0x3f : 0x1f);
                     MOV32w(x2, u8);
                     GETEDW(x4, x1, 0);
-                    CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4);
+                    CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4, x1, x2);
                     WBACK;
                     if (!wback && !rex.w) ZEROUP(ed);
                     break;
@@ -446,7 +446,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         GETIP(ip + 1); // read the 0xCC
                         STORE_XEMU_CALL(x3);
                         ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip
-                        CALL_S(x64Int3, -1);
+                        CALL_S(x64Int3, -1, x1);
                         LOAD_XEMU_CALL();
                         addr += 8 + 8;
                         TABLE64(x3, addr); // expected return address
@@ -468,7 +468,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     BEQZ_MARK(x3);
                     GETIP(addr);
                     STORE_XEMU_CALL(x3);
-                    CALL(native_int3, -1);
+                    CALL(native_int3, -1, 0, 0);
                     LOAD_XEMU_CALL();
                     MARK;
                     jump_to_epilog(dyn, addr, 0, ninst);
@@ -487,7 +487,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 GETIP(ip); // priviledged instruction, IP not updated
                 STORE_XEMU_CALL(x3);
                 MOV32w(x1, u8);
-                CALL(native_int, -1);
+                CALL(native_int, -1, x1, 0);
                 LOAD_XEMU_CALL();
             } else if (u8 == 0x80) {
                 INST_NAME("32bits SYSCALL");
@@ -495,7 +495,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SMEND();
                 GETIP(addr);
                 STORE_XEMU_CALL(x3);
-                CALL_S(x86Syscall, -1);
+                CALL_S(x86Syscall, -1, 0);
                 LOAD_XEMU_CALL();
                 TABLE64(x3, addr); // expected return address
                 BNE_MARK(xRIP, x3);
@@ -509,7 +509,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
                 GETIP(addr);
                 STORE_XEMU_CALL(x3);
-                CALL(native_int3, -1);
+                CALL(native_int3, -1, 0, 0);
                 LOAD_XEMU_CALL();
                 jump_to_epilog(dyn, 0, xRIP, ninst);
                 *need_epilog = 0;
@@ -519,7 +519,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
                 GETIP(ip);                                        // priviledged instruction, IP not updated
                 STORE_XEMU_CALL(x3);
-                CALL(native_priv, -1);
+                CALL(native_priv, -1, 0, 0);
                 LOAD_XEMU_CALL();
                 jump_to_epilog(dyn, 0, xRIP, ninst);
                 *need_epilog = 0;
@@ -550,7 +550,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     }
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
-                    CALL_(rol8, ed, x3);
+                    CALL_(rol8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 1:
@@ -565,7 +565,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     }
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
-                    CALL_(ror8, ed, x3);
+                    CALL_(ror8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 2:
@@ -581,7 +581,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     READFLAGS(X_CF);
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
-                    CALL_(rcl8, ed, x3);
+                    CALL_(rcl8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 3:
@@ -597,7 +597,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     READFLAGS(X_CF);
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
-                    CALL_(rcr8, ed, x3);
+                    CALL_(rcr8, ed, x3, x1, x2);
                     EBBACK(x5, 0);
                     break;
                 case 4:
@@ -682,7 +682,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     MOV32w(x2, 1);
                     GETEDW(x4, x1, 0);
-                    CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4);
+                    CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4, x1, x2);
                     WBACK;
                     if (!wback && !rex.w) ZEROUP(ed);
                     break;
@@ -693,7 +693,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     MOV32w(x2, 1);
                     GETEDW(x4, x1, 0);
-                    CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4);
+                    CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4, x1, x2);
                     WBACK;
                     if (!wback && !rex.w) ZEROUP(ed);
                     break;
@@ -752,7 +752,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     ANDI(x2, xRCX, rex.w ? 0x3f : 0x1f);
                     GETEDW(x4, x1, 0);
-                    CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4);
+                    CALL_(rex.w ? ((void*)rcl64) : ((void*)rcl32), ed, x4, x1, x2);
                     WBACK;
                     if (!wback && !rex.w) ZEROUP(ed);
                     break;
@@ -763,7 +763,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     ANDI(x2, xRCX, rex.w ? 0x3f : 0x1f);
                     GETEDW(x4, x1, 0);
-                    CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4);
+                    CALL_(rex.w ? ((void*)rcr64) : ((void*)rcr32), ed, x4, x1, x2);
                     WBACK;
                     if (!wback && !rex.w) ZEROUP(ed);
                     break;
@@ -946,7 +946,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         GETIP_(dyn->insts[ninst].natcall); // read the 0xCC already
                         STORE_XEMU_CALL(x3);
                         ADDI(x1, xEmu, (uint32_t)offsetof(x64emu_t, ip)); // setup addr as &emu->ip
-                        CALL_S(x64Int3, -1);
+                        CALL_S(x64Int3, -1, x1);
                         LOAD_XEMU_CALL();
                         TABLE64(x3, dyn->insts[ninst].natcall);
                         ADDI(x3, x3, 2 + 8 + 8);
@@ -1074,7 +1074,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
             GETIP(ip);
             STORE_XEMU_CALL(xRIP);
-            CALL(native_priv, -1);
+            CALL(native_priv, -1, 0, 0);
             LOAD_XEMU_CALL();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             *need_epilog = 0;
@@ -1146,7 +1146,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     GETEB(x1, 0);
-                    CALL(div8, -1);
+                    CALL(div8, -1, x1, 0);
                     break;
                 case 7:
                     INST_NAME("IDIV Eb");
@@ -1154,7 +1154,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     SETFLAGS(X_ALL, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     GETEB(x1, 0);
-                    CALL(idiv8, -1);
+                    CALL(idiv8, -1, x1, 0);
                     break;
             }
             break;
@@ -1268,14 +1268,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                             MESSAGE(LOG_INFO, "Divide by 0 hack\n");
                             GETIP(ip);
                             STORE_XEMU_CALL(x3);
-                            CALL(native_div0, -1);
+                            CALL(native_div0, -1, 0, 0);
                             LOAD_XEMU_CALL();
                         } else {
                             if (box64_dynarec_div0) {
                                 BNE_MARK3(ed, xZR);
                                 GETIP_(ip);
                                 STORE_XEMU_CALL(x3);
-                                CALL(native_div0, -1);
+                                CALL(native_div0, -1, 0, 0);
                                 CLEARIP();
                                 LOAD_XEMU_CALL();
                                 jump_to_epilog(dyn, 0, xRIP, ninst);
@@ -1303,7 +1303,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                                 BNE_MARK3(ed, xZR);
                                 GETIP_(ip);
                                 STORE_XEMU_CALL(x3);
-                                CALL(native_div0, -1);
+                                CALL(native_div0, -1, 0, 0);
                                 CLEARIP();
                                 LOAD_XEMU_CALL();
                                 jump_to_epilog(dyn, 0, xRIP, ninst);
@@ -1318,15 +1318,14 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                                 BNE_MARK3(ed, xZR);
                                 GETIP_(ip);
                                 STORE_XEMU_CALL(x3);
-                                CALL(native_div0, -1);
+                                CALL(native_div0, -1, 0, 0);
                                 CLEARIP();
                                 LOAD_XEMU_CALL();
                                 jump_to_epilog(dyn, 0, xRIP, ninst);
                                 MARK3;
                             }
                             BEQ_MARK(xRDX, xZR);
-                            if (ed != x1) { MV(x1, ed); }
-                            CALL(div64, -1);
+                            CALL(div64, -1, ed, 0);
                             B_NEXT_nocond;
                             MARK;
                             DIVU(x2, xRAX, ed);
@@ -1346,7 +1345,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                             BNE_MARK3(ed, xZR);
                             GETIP_(ip);
                             STORE_XEMU_CALL(x3);
-                            CALL(native_div0, -1);
+                            CALL(native_div0, -1, 0, 0);
                             CLEARIP();
                             LOAD_XEMU_CALL();
                             jump_to_epilog(dyn, 0, xRIP, ninst);
@@ -1369,7 +1368,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                                 BNE_MARK3(ed, xZR);
                                 GETIP_(ip);
                                 STORE_XEMU_CALL(x3);
-                                CALL(native_div0, -1);
+                                CALL(native_div0, -1, 0, 0);
                                 CLEARIP();
                                 LOAD_XEMU_CALL();
                                 jump_to_epilog(dyn, 0, xRIP, ninst);
@@ -1384,7 +1383,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                                 BNE_MARK3(ed, xZR);
                                 GETIP_(ip);
                                 STORE_XEMU_CALL(x3);
-                                CALL(native_div0, -1);
+                                CALL(native_div0, -1, 0, 0);
                                 CLEARIP();
                                 LOAD_XEMU_CALL();
                                 jump_to_epilog(dyn, 0, xRIP, ninst);
@@ -1399,8 +1398,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                             BNE_MARK3(x2, xZR);
                             BLT_MARK(xRAX, xZR);
                             MARK3;
-                            if (ed != x1) MV(x1, ed);
-                            CALL((void*)idiv64, -1);
+                            CALL((void*)idiv64, -1, ed, 0);
                             B_NEXT_nocond;
                             MARK;
                             DIV(x2, xRAX, ed);
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 9838d9b8..12f7fb03 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -91,7 +91,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         INST_NAME("RDTSCP");
                         NOTEST(x1);
                         if (box64_rdtsc) {
-                            CALL(ReadTSC, x3); // will return the u64 in x3
+                            CALL(ReadTSC, x3, 0, 0); // will return the u64 in x3
                         } else {
                             CSRRS(x3, xZR, 0xC01); // RDTIME
                         }
@@ -118,7 +118,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SMEND();
             GETIP(addr);
             STORE_XEMU_CALL(x3);
-            CALL_S(x64Syscall, -1);
+            CALL_S(x64Syscall, -1, 0);
             LOAD_XEMU_CALL();
             TABLE64(x3, addr); // expected return address
             BNE_MARK(xRIP, x3);
@@ -134,7 +134,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
             GETIP(ip);
             STORE_XEMU_CALL(x3);
-            CALL(native_ud, -1);
+            CALL(native_ud, -1, 0, 0);
             LOAD_XEMU_CALL();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             *need_epilog = 0;
@@ -146,7 +146,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL, SF_SET_NODF, NAT_FLAGS_NOFUSION); // Hack to set flags in "don't care" state
             GETIP(ip);
             STORE_XEMU_CALL(x3);
-            CALL(native_ud, -1);
+            CALL(native_ud, -1, 0, 0);
             LOAD_XEMU_CALL();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             *need_epilog = 0;
@@ -416,7 +416,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("RDTSC");
             NOTEST(x1);
             if (box64_rdtsc) {
-                CALL(ReadTSC, x3); // will return the u64 in x3
+                CALL(ReadTSC, x3, 0, 0); // will return the u64 in x3
             } else {
                 CSRRS(x3, xZR, 0xC01); // RDTIME
             }
@@ -554,11 +554,11 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     for (int i = 0; i < 4; ++i) {
                         LBU(x3, gback, gdoffset + i * 2);
                         LB(x4, wback, fixedaddress + i * 2);
-                        MUL(x9, x3, x4);
+                        MUL(x7, x3, x4);
                         LBU(x3, gback, gdoffset + i * 2 + 1);
                         LB(x4, wback, fixedaddress + i * 2 + 1);
                         MUL(x3, x3, x4);
-                        ADD(x3, x3, x9);
+                        ADD(x3, x3, x7);
                         if (rv64_zbb) {
                             MIN(x3, x3, x5);
                             MAX(x3, x3, x6);
@@ -791,12 +791,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         ed = (nextop & 7) + (rex.b << 3);
                         sse_reflect_reg(dyn, ninst, x6, ed);
                         ADDI(x2, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        ed = x2;
                     } else {
                         SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
-                        if (ed != x2) {
-                            MV(x2, ed);
-                        }
                     }
                     GETG;
                     sse_forget_reg(dyn, ninst, x6, gd);
@@ -804,22 +802,22 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     sse_reflect_reg(dyn, ninst, x6, 0);
                     switch (u8) {
                         case 0xC8:
-                            CALL(sha1nexte, -1);
+                            CALL(sha1nexte, -1, x1, ed);
                             break;
                         case 0xC9:
-                            CALL(sha1msg1, -1);
+                            CALL(sha1msg1, -1, x1, ed);
                             break;
                         case 0xCA:
-                            CALL(sha1msg2, -1);
+                            CALL(sha1msg2, -1, x1, ed);
                             break;
                         case 0xCB:
-                            CALL(sha256rnds2, -1);
+                            CALL(sha256rnds2, -1, x1, ed);
                             break;
                         case 0xCC:
-                            CALL(sha256msg1, -1);
+                            CALL(sha256msg1, -1, x1, ed);
                             break;
                         case 0xCD:
-                            CALL(sha256msg2, -1);
+                            CALL(sha256msg2, -1, x1, ed);
                             break;
                     }
                     break;
@@ -883,17 +881,17 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         ed = (nextop & 7) + (rex.b << 3);
                         sse_reflect_reg(dyn, ninst, x6, ed);
                         ADDI(x2, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        wback = x2;
                     } else {
                         SMREAD();
                         addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 1);
-                        if (wback != x2) MV(x2, wback);
                     }
                     u8 = F8;
                     GETG;
                     sse_forget_reg(dyn, ninst, x6, gd);
                     ADDI(x1, xEmu, offsetof(x64emu_t, xmm[gd]));
                     MOV32w(x3, u8);
-                    CALL(sha1rnds4, -1);
+                    CALL4(sha1rnds4, -1, x1, wback, x3, 0);
                     break;
                 default:
                     DEFAULT;
@@ -1746,8 +1744,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0xA2:
             INST_NAME("CPUID");
             NOTEST(x1);
-            MV(A1, xRAX);
-            CALL_(my_cpuid, -1, 0);
+            CALL_(my_cpuid, -1, 0, xRAX, 0);
             // BX and DX are not synchronized during the call, so need to force the update
             LD(xRDX, xEmu, offsetof(x64emu_t, regs[_DX]));
             LD(xRBX, xEmu, offsetof(x64emu_t, regs[_BX]));
@@ -1890,8 +1887,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         SKIPTEST(x1);
                         fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0);
-                        if (ed != x1) { MV(x1, ed); }
-                        CALL(rex.is32bits ? ((void*)fpu_fxsave32) : ((void*)fpu_fxsave64), -1);
+                        CALL(rex.is32bits ? ((void*)fpu_fxsave32) : ((void*)fpu_fxsave64), -1, ed, 0);
                         break;
                     case 1:
                         INST_NAME("FXRSTOR Ed");
@@ -1899,8 +1895,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         SKIPTEST(x1);
                         fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, x3, &fixedaddress, rex, NULL, 0, 0);
-                        if (ed != x1) { MV(x1, ed); }
-                        CALL(rex.is32bits ? ((void*)fpu_fxrstor32) : ((void*)fpu_fxrstor64), -1);
+                        CALL(rex.is32bits ? ((void*)fpu_fxrstor32) : ((void*)fpu_fxrstor64), -1, ed, 0);
                         break;
                     case 2:
                         INST_NAME("LDMXCSR Md");
@@ -1920,28 +1915,23 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         INST_NAME("XSAVE Ed");
                         MESSAGE(LOG_DUMP, "Need Optimization\n");
                         fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                        if (ed != x1) { MV(x1, ed); }
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
                         MOV32w(x2, rex.w ? 0 : 1);
-                        CALL((void*)fpu_xsave, -1);
+                        CALL((void*)fpu_xsave, -1, ed, x2);
                         break;
                     case 5:
                         INST_NAME("XRSTOR Ed");
                         MESSAGE(LOG_DUMP, "Need Optimization\n");
                         fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
-                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                        if (ed != x1) { MV(x1, ed); }
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
                         MOV32w(x2, rex.w ? 0 : 1);
-                        CALL((void*)fpu_xrstor, -1);
+                        CALL((void*)fpu_xrstor, -1, ed, x2);
                         break;
                     case 7:
                         INST_NAME("CLFLUSH Ed");
                         MESSAGE(LOG_DUMP, "Need Optimization?\n");
-                        addr = geted(dyn, addr, ninst, nextop, &wback, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                        if (wback != A1) {
-                            MV(A1, wback);
-                        }
-                        CALL_(native_clflush, -1, 0);
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
+                        CALL_(native_clflush, -1, 0, ed, 0);
                         break;
                     default:
                         DEFAULT;
@@ -2268,10 +2258,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETEB(x1, 0);
             GETGB(x2);
             if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
-                MV(x9, ed);
+                MV(x7, ed);
             emit_add8(dyn, ninst, ed, gd, x4, x5, x6);
             if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
-                MV(gd, x9);
+                MV(gd, x7);
             EBBACK(x5, 0);
             if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
                 GBBACK(x5);
@@ -2283,10 +2273,10 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGD;
             GETED(0);
             if (ed != gd)
-                MV(x9, ed);
+                MV(x7, ed);
             emit_add32(dyn, ninst, rex, ed, gd, x4, x5, x6);
             if (ed != gd)
-                MVxw(gd, x9);
+                MVxw(gd, x7);
             WBACK;
             break;
         case 0xC2:
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c
index 282f39ee..4b7aaa94 100644
--- a/src/dynarec/rv64/dynarec_rv64_64.c
+++ b/src/dynarec/rv64/dynarec_rv64_64.c
@@ -260,7 +260,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         i64 = F32S;
                     else
                         i64 = F8S;
-                    emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x9);
+                    emit_add32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x7);
                     WBACKO(x6);
                     break;
                 case 1: // OR
@@ -293,7 +293,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         i64 = F8S;
                     MOV64xw(x5, i64);
                     SD(x6, xEmu, offsetof(x64emu_t, scratch));
-                    emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x9);
+                    emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x7);
                     LD(x6, xEmu, offsetof(x64emu_t, scratch));
                     WBACKO(x6);
                     break;
@@ -311,7 +311,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     else
                         i64 = F8S;
                     MOV64xw(x5, i64);
-                    emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4, x9);
+                    emit_sbb32(dyn, ninst, rex, ed, x5, x3, x4, x7);
                     WBACKO(x6);
                     break;
                 case 4: // AND
@@ -341,7 +341,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         i64 = F32S;
                     else
                         i64 = F8S;
-                    emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x9);
+                    emit_sub32c(dyn, ninst, rex, ed, i64, x3, x4, x5, x7);
                     WBACKO(x6);
                     break;
                 case 6: // XOR
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 80a28e13..b36614b8 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -344,7 +344,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             nextop = F8;
             GETGW(x2);
             GETEW(x1, 0);
-            emit_cmp16(dyn, ninst, x1, x2, x9, x4, x5, x6);
+            emit_cmp16(dyn, ninst, x1, x2, x7, x4, x5, x6);
             break;
         case 0x3B:
             INST_NAME("CMP Gw, Ew");
@@ -352,7 +352,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             nextop = F8;
             GETGW(x1);
             GETEW(x2, 0);
-            emit_cmp16(dyn, ninst, x1, x2, x9, x4, x5, x6);
+            emit_cmp16(dyn, ninst, x1, x2, x7, x4, x5, x6);
             break;
         case 0x3D:
             INST_NAME("CMP AX, Iw");
@@ -603,9 +603,9 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         u64 = (uint16_t)(int16_t)F8S;
                     if (u64) {
                         MOV64x(x2, u64);
-                        emit_cmp16(dyn, ninst, x1, x2, x9, x4, x5, x6);
+                        emit_cmp16(dyn, ninst, x1, x2, x7, x4, x5, x6);
                     } else
-                        emit_cmp16_0(dyn, ninst, x1, x9, x4);
+                        emit_cmp16_0(dyn, ninst, x1, x7, x4);
                     break;
                 default:
                     DEFAULT;
@@ -1052,7 +1052,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETEW(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(rol16, x1, x3);
+                    CALL_(rol16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 1:
@@ -1062,7 +1062,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETEW(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(ror16, x1, x3);
+                    CALL_(ror16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 2:
@@ -1073,7 +1073,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETEW(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(rcl16, x1, x3);
+                    CALL_(rcl16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 3:
@@ -1084,7 +1084,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     GETEW(x1, 1);
                     u8 = F8;
                     MOV32w(x2, u8);
-                    CALL_(rcr16, x1, x3);
+                    CALL_(rcr16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 4:
@@ -1161,7 +1161,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     GETEW(x1, 1);
-                    CALL_(rol16, x1, x3);
+                    CALL_(rol16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 1:
@@ -1175,7 +1175,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     GETEW(x1, 1);
-                    CALL_(ror16, x1, x3);
+                    CALL_(ror16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 2:
@@ -1190,7 +1190,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     READFLAGS(X_CF);
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     GETEW(x1, 1);
-                    CALL_(rcl16, x1, x3);
+                    CALL_(rcl16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 3:
@@ -1205,7 +1205,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     READFLAGS(X_CF);
                     SETFLAGS(X_OF | X_CF, SF_SET_DF, NAT_FLAGS_NOFUSION);
                     GETEW(x1, 1);
-                    CALL_(rcr16, x1, x3);
+                    CALL_(rcr16, x1, x3, x1, x2);
                     EWBACK;
                     break;
                 case 5:
@@ -1326,22 +1326,22 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SET_DFNONE();
                     GETEW(x1, 0);
                     ZEXTH(x2, xRAX);
-                    SLLI(x9, xRDX, 48);
-                    SRLI(x9, x9, 32);
-                    OR(x2, x2, x9);
+                    SLLI(x7, xRDX, 48);
+                    SRLI(x7, x7, 32);
+                    OR(x2, x2, x7);
                     if (box64_dynarec_div0) {
                         BNE_MARK3(ed, xZR);
                         GETIP_(ip);
                         STORE_XEMU_CALL(x6);
-                        CALL(native_div0, -1);
+                        CALL(native_div0, -1, 0, 0);
                         CLEARIP();
                         LOAD_XEMU_CALL();
                         jump_to_epilog(dyn, 0, xRIP, ninst);
                         MARK3;
                     }
-                    DIVUW(x9, x2, ed);
+                    DIVUW(x7, x2, ed);
                     REMUW(x4, x2, ed);
-                    INSHz(xRAX, x9, x5, x6, 1, 1);
+                    INSHz(xRAX, x7, x5, x6, 1, 1);
                     INSHz(xRDX, x4, x5, x6, 0, 1);
                     break;
                 case 7:
@@ -1354,7 +1354,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         BNE_MARK3(ed, xZR);
                         GETIP_(ip);
                         STORE_XEMU_CALL(x6);
-                        CALL(native_div0, -1);
+                        CALL(native_div0, -1, 0, 0);
                         CLEARIP();
                         LOAD_XEMU_CALL();
                         jump_to_epilog(dyn, 0, xRIP, ninst);
diff --git a/src/dynarec/rv64/dynarec_rv64_660f.c b/src/dynarec/rv64/dynarec_rv64_660f.c
index f9547d8c..2bad6621 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f.c
@@ -1451,10 +1451,10 @@ uintptr_t dynarec64_660F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGW(x1);
             GETEW(x2, 0);
             if (!(MODREG && wback == TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3))))
-                MV(x9, ed);
+                MV(x7, ed);
             emit_add16(dyn, ninst, ed, gd, x4, x5, x6);
             if (!(MODREG && wback == TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3))))
-                MV(gd, x9);
+                MV(gd, x7);
             EWBACK;
             if (!(MODREG && wback == TO_NAT(((nextop & 0x38) >> 3) + (rex.r << 3))))
                 GWBACK;
diff --git a/src/dynarec/rv64/dynarec_rv64_660f38.c b/src/dynarec/rv64/dynarec_rv64_660f38.c
index 5ac15061..ca1146f0 100644
--- a/src/dynarec/rv64/dynarec_rv64_660f38.c
+++ b/src/dynarec/rv64/dynarec_rv64_660f38.c
@@ -197,11 +197,11 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     for (int i = 0; i < 8; ++i) {
                         LBU(x3, gback, gdoffset + i * 2);
                         LB(x4, wback, fixedaddress + i * 2);
-                        MUL(x9, x3, x4);
+                        MUL(x7, x3, x4);
                         LBU(x3, gback, gdoffset + i * 2 + 1);
                         LB(x4, wback, fixedaddress + i * 2 + 1);
                         MUL(x3, x3, x4);
-                        ADD(x3, x3, x9);
+                        ADD(x3, x3, x7);
                         if (rv64_zbb) {
                             MIN(x3, x3, x5);
                             MAX(x3, x3, x6);
@@ -742,16 +742,16 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                         ed = (nextop & 7) + (rex.b << 3);
                         sse_reflect_reg(dyn, ninst, x6, ed);
                         ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        ed = x1;
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
-                        if (ed != x1) MV(x1, ed);
                     }
                     // prepare rest arguments
                     MV(x2, xRDX);
                     MV(x4, xRAX);
                     u8 = F8;
                     MOV32w(x5, u8);
-                    CALL(sse42_compare_string_explicit_len, x1);
+                    CALL6(sse42_compare_string_explicit_len, x1, ed, x2, x3, x4, x5, 0);
                     ZEROUP(x1);
                     BNEZ_MARK(x1);
                     MOV32w(xRCX, (u8 & 1) ? 8 : 16);
@@ -773,7 +773,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     SSE_LOOP_MV_Q(x3);
                     sse_forget_reg(dyn, ninst, x6, gd);
                     MOV32w(x1, gd);
-                    CALL(native_aesimc, -1);
+                    CALL(native_aesimc, -1, x1, 0);
                     break;
                 case 0xDC:
                     INST_NAME("AESENC Gx, Ex"); // AES-NI
@@ -781,7 +781,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     GETG;
                     sse_forget_reg(dyn, ninst, x6, gd);
                     MOV32w(x1, gd);
-                    CALL(native_aese, -1);
+                    CALL(native_aese, -1, x1, 0);
                     GETGX();
                     GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
@@ -792,7 +792,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     GETG;
                     sse_forget_reg(dyn, ninst, x6, gd);
                     MOV32w(x1, gd);
-                    CALL(native_aeselast, -1);
+                    CALL(native_aeselast, -1, x1, 0);
                     GETGX();
                     GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
@@ -803,7 +803,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     GETG;
                     sse_forget_reg(dyn, ninst, x6, gd);
                     MOV32w(x1, gd);
-                    CALL(native_aesd, -1);
+                    CALL(native_aesd, -1, x1, 0);
                     GETGX();
                     GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
@@ -815,7 +815,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     GETG;
                     sse_forget_reg(dyn, ninst, x6, gd);
                     MOV32w(x1, gd);
-                    CALL(native_aesdlast, -1);
+                    CALL(native_aesdlast, -1, x1, 0);
                     GETGX();
                     GETEX(x2, 0, 8);
                     SSE_LOOP_Q(x3, x4, XOR(x3, x3, x4));
@@ -1218,7 +1218,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     }
                     u8 = F8;
                     MOV32w(x4, u8);
-                    CALL(native_pclmul, -1);
+                    CALL4(native_pclmul, -1, x1, x2, x3, x4);
                     break;
                 case 0x63:
                     INST_NAME("PCMPISTRI Gx, Ex, Ib");
@@ -1231,13 +1231,13 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                         ed = (nextop & 7) + (rex.b << 3);
                         sse_reflect_reg(dyn, ninst, x6, ed);
                         ADDI(x1, xEmu, offsetof(x64emu_t, xmm[ed]));
+                        ed = x1;
                     } else {
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 1);
-                        if (ed != x1) MV(x1, ed);
                     }
                     u8 = F8;
                     MOV32w(x3, u8);
-                    CALL(sse42_compare_string_implicit_len, x1);
+                    CALL4(sse42_compare_string_implicit_len, x1, ed, x2, x3, 0);
                     ZEROUP(x1);
                     BNEZ_MARK(x1);
                     MOV32w(xRCX, (u8 & 1) ? 8 : 16);
@@ -1271,7 +1271,7 @@ uintptr_t dynarec64_660F38(dynarec_rv64_t* dyn, uintptr_t addr, uint8_t opcode,
                     }
                     u8 = F8;
                     MOV32w(x4, u8);
-                    CALL(native_aeskeygenassist, -1);
+                    CALL4(native_aeskeygenassist, -1, x1, x2, x3, x4);
                     break;
                 default:
                     DEFAULT;
diff --git a/src/dynarec/rv64/dynarec_rv64_d9.c b/src/dynarec/rv64/dynarec_rv64_d9.c
index 44fe914d..4ea79815 100644
--- a/src/dynarec/rv64/dynarec_rv64_d9.c
+++ b/src/dynarec/rv64/dynarec_rv64_d9.c
@@ -210,7 +210,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_refresh(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x1);
-            CALL(fpu_fxam, -1); // should be possible inline, but is it worth it?
+            CALL(fpu_fxam, -1, 0, 0); // should be possible inline, but is it worth it?
             x87_unstackcount(dyn, ninst, x1, s0);
 #endif
             break;
@@ -266,7 +266,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_f2xm1, -1);
+            CALL(native_f2xm1, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF1:
@@ -275,7 +275,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fyl2x, -1);
+            CALL(native_fyl2x, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
@@ -284,7 +284,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_ftan, -1);
+            CALL(native_ftan, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             X87_PUSH_OR_FAIL(v1, dyn, ninst, x1, EXT_CACHE_ST_F);
             if (ST_IS_F(0)) {
@@ -301,7 +301,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fpatan, -1);
+            CALL(native_fpatan, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
@@ -311,7 +311,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fxtract, -1);
+            CALL(native_fxtract, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF5:
@@ -320,7 +320,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fprem1, -1);
+            CALL(native_fprem1, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF6:
@@ -345,7 +345,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fprem, -1);
+            CALL(native_fprem, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xF9:
@@ -354,7 +354,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fyl2xp1, -1);
+            CALL(native_fyl2xp1, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             X87_POP_OR_FAIL(dyn, ninst, x3);
             break;
@@ -373,7 +373,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fsincos, -1);
+            CALL(native_fsincos, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFC:
@@ -422,7 +422,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             x87_forget(dyn, ninst, x1, x2, 0);
             x87_forget(dyn, ninst, x1, x2, 1);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fscale, -1);
+            CALL(native_fscale, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFE:
@@ -430,7 +430,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fsin, -1);
+            CALL(native_fsin, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
         case 0xFF:
@@ -438,7 +438,7 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_forget(dyn, ninst, x1, x2, 0);
             s0 = x87_stackcount(dyn, ninst, x3);
-            CALL(native_fcos, -1);
+            CALL(native_fcos, -1, 0, 0);
             x87_unstackcount(dyn, ninst, x3, s0);
             break;
 
@@ -494,11 +494,8 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    if (ed != x1) {
-                        MV(x1, ed);
-                    }
                     MOV32w(x2, 0);
-                    CALL(fpu_loadenv, -1);
+                    CALL(fpu_loadenv, -1, ed, x2);
                     break;
                 case 5:
                     INST_NAME("FLDCW Ew");
@@ -510,11 +507,8 @@ uintptr_t dynarec64_D9(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     fpu_purgecache(dyn, ninst, 0, x1, x2, x3); // maybe only x87, not SSE?
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    if (ed != x1) {
-                        MV(x1, ed);
-                    }
                     MOV32w(x2, 0);
-                    CALL(fpu_savenv, -1);
+                    CALL(fpu_savenv, -1, ed, x2);
                     break;
                 case 7:
                     INST_NAME("FNSTCW Ew");
diff --git a/src/dynarec/rv64/dynarec_rv64_db.c b/src/dynarec/rv64/dynarec_rv64_db.c
index 70c77965..6943eac7 100644
--- a/src/dynarec/rv64/dynarec_rv64_db.c
+++ b/src/dynarec/rv64/dynarec_rv64_db.c
@@ -140,7 +140,7 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             INST_NAME("FNINIT");
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_purgecache(dyn, ninst, 0, x1, x2, x3);
-            CALL(reset_fpu, -1);
+            CALL(reset_fpu, -1, 0, 0);
             break;
         case 0xE8:
         case 0xE9:
@@ -268,7 +268,7 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 ADDI(x1, ed, fixedaddress);
                             }
                             X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x3);
-                            CALL(native_fld, -1);
+                            CALL(native_fld, -1, x1, 0);
                         }
                     }
                     break;
@@ -281,11 +281,8 @@ uintptr_t dynarec64_DB(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     } else {
                         x87_forget(dyn, ninst, x1, x3, 0);
                         addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                        if (ed != x1) {
-                            MV(x1, ed);
-                        }
                         s0 = x87_stackcount(dyn, ninst, x3);
-                        CALL(native_fstp, -1);
+                        CALL(native_fstp, -1, ed, 0);
                         x87_unstackcount(dyn, ninst, x3, s0);
                     }
                     X87_POP_OR_FAIL(dyn, ninst, x3);
diff --git a/src/dynarec/rv64/dynarec_rv64_dd.c b/src/dynarec/rv64/dynarec_rv64_dd.c
index fccc97d5..2d2c22d4 100644
--- a/src/dynarec/rv64/dynarec_rv64_dd.c
+++ b/src/dynarec/rv64/dynarec_rv64_dd.c
@@ -55,7 +55,7 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             MESSAGE(LOG_DUMP, "Need Optimization\n");
             x87_purgecache(dyn, ninst, 0, x1, x2, x3);
             MOV32w(x1, nextop & 7);
-            CALL(fpu_do_free, -1);
+            CALL(fpu_do_free, -1, x1, 0);
             break;
         case 0xD0:
         case 0xD1:
@@ -202,8 +202,7 @@ uintptr_t dynarec64_DD(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MESSAGE(LOG_DUMP, "Need Optimization\n");
                     fpu_purgecache(dyn, ninst, 0, x1, x2, x3);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x4, x6, &fixedaddress, rex, NULL, 0, 0);
-                    if (ed != x1) { MV(x1, ed); }
-                    CALL(native_fsave, -1);
+                    CALL(native_fsave, -1, ed, 0);
                     break;
                 case 7:
                     INST_NAME("FNSTSW m2byte");
diff --git a/src/dynarec/rv64/dynarec_rv64_df.c b/src/dynarec/rv64/dynarec_rv64_df.c
index 7a775c3c..119a2910 100644
--- a/src/dynarec/rv64/dynarec_rv64_df.c
+++ b/src/dynarec/rv64/dynarec_rv64_df.c
@@ -216,9 +216,8 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("FBLD ST0, tbytes");
                     X87_PUSH_EMPTY_OR_FAIL(dyn, ninst, x1);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    if (ed != x1) { MV(x1, ed); }
                     s0 = x87_stackcount(dyn, ninst, x3);
-                    CALL(fpu_fbld, -1);
+                    CALL(fpu_fbld, -1, ed, 0);
                     x87_unstackcount(dyn, ninst, x3, s0);
                     break;
                 case 5:
@@ -253,9 +252,8 @@ uintptr_t dynarec64_DF(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("FBSTP tbytes, ST0");
                     x87_forget(dyn, ninst, x1, x2, 0);
                     addr = geted(dyn, addr, ninst, nextop, &ed, x1, x2, &fixedaddress, rex, NULL, 0, 0);
-                    if (ed != x1) { MV(x1, ed); }
                     s0 = x87_stackcount(dyn, ninst, x3);
-                    CALL(fpu_fbst, -1);
+                    CALL(fpu_fbst, -1, ed, 0);
                     x87_unstackcount(dyn, ninst, x3, s0);
                     X87_POP_OR_FAIL(dyn, ninst, x3);
                     break;
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index 26664a03..b4e529a7 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -168,8 +168,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 SLL(x1, x1, x5);
                                 OR(x1, x1, x2);
                                 ANDI(x2, wback, ~0b11); // align to 32bit again
-                                SC_W(x9, x1, x2, 1, 1);
-                                BNEZ_MARKLOCK(x9);
+                                SC_W(x7, x1, x2, 1, 1);
+                                BNEZ_MARKLOCK(x7);
                                 // done
                                 MARK;
                                 UFLAG_IF { emit_cmp8(dyn, ninst, x6, x4, x1, x2, x3, x5); }
@@ -283,11 +283,11 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             ANDI(xFlags, xFlags, ~(1 << F_ZF));
                             if (rex.w) {
                                 // there is no atomic move on 16bytes, so implement it with mutex
-                                LD(x9, xEmu, offsetof(x64emu_t, context));
-                                ADDI(x9, x9, offsetof(box64context_t, mutex_16b));
+                                LD(x7, xEmu, offsetof(x64emu_t, context));
+                                ADDI(x7, x7, offsetof(box64context_t, mutex_16b));
                                 ADDI(x4, xZR, 1);
                                 MARK2;
-                                AMOSWAP_W(x4, x4, x9, 1, 1);
+                                AMOSWAP_W(x4, x4, x7, 1, 1);
                                 // x4 == 1 if locked
                                 BNEZ_MARK2(x4);
 
@@ -309,7 +309,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 SMDMB();
 
                                 // unlock
-                                AMOSWAP_W(xZR, xZR, x9, 1, 1);
+                                AMOSWAP_W(xZR, xZR, x7, 1, 1);
                             } else {
                                 SMDMB();
                                 AND(x3, xRAX, xMASK);
@@ -375,12 +375,12 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ANDI(x4, x5, 0xff);  // x4 = Ed.b[0]
                 ANDI(x5, x5, ~0xff); // x5 = clear Ed.b[0]
                 ADDW(x6, x4, x2);
-                ANDI(x9, xFlags, 1 << F_CF);
-                ADDW(x6, x6, x9); // x6 = adc
+                ANDI(x7, xFlags, 1 << F_CF);
+                ADDW(x6, x6, x7); // x6 = adc
                 ANDI(x6, x6, 0xff);
                 OR(x5, x5, x6);
-                SC_W(x9, x5, wback, 1, 1);
-                BNEZ_MARKLOCK(x9);
+                SC_W(x7, x5, wback, 1, 1);
+                BNEZ_MARKLOCK(x7);
                 B_MARK3_nocond;
                 MARK;
                 SLLI(x3, x3, 3);
@@ -391,9 +391,9 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SLL(x2, x2, x3);        // x2 = extented Gb
                 MARK2;
                 LR_W(x6, wback, 1, 1); // x6 = Ed
-                AND(x9, x6, x4);       // x9 = extended Ed.b[dest]
+                AND(x7, x6, x4);       // x7 = extended Ed.b[dest]
                 AND(x6, x6, x5);       // x6 = clear Ed.b[dest]
-                ADDW(x5, x9, x2);
+                ADDW(x5, x7, x2);
                 ANDI(x4, xFlags, 1 << F_CF);
                 SLL(x4, x4, x3);  // extented
                 ADDW(x5, x5, x4); // x5 = adc
@@ -402,7 +402,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 BNEZ_MARK2(x4);
                 IFXORNAT (X_ALL | X_PEND) {
                     SRLI(x2, x2, x3); // Gb
-                    SRLI(x4, x9, x3); // Eb
+                    SRLI(x4, x7, x3); // Eb
                 }
                 MARK3;
                 IFXORNAT (X_ALL | X_PEND) {
@@ -533,11 +533,11 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             i64 = F32S;
                         else
                             i64 = F8S;
-                        MOV64xw(x9, i64);
+                        MOV64xw(x7, i64);
                         ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                         BNEZ_MARK3(x1);
                         // Aligned
-                        AMOADDxw(x1, x9, wback, 1, 1);
+                        AMOADDxw(x1, x7, wback, 1, 1);
                         B_MARK_nocond;
                         MARK3;
                         // Unaligned
@@ -545,7 +545,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks.
                         LDxw(x1, wback, 0);
                         LRxw(x6, x5, 1, 1);
-                        ADDxw(x4, x1, x9);
+                        ADDxw(x4, x1, x7);
                         SCxw(x3, x6, x5, 1, 1);
                         BNEZ_MARK2(x3);
                         SDxw(x4, wback, 0);
@@ -601,8 +601,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             i64 = F32S;
                         else
                             i64 = F8S;
-                        MOV64xw(x9, i64);
-                        AMOANDxw(x1, x9, wback, 1, 1);
+                        MOV64xw(x7, i64);
+                        AMOANDxw(x1, x7, wback, 1, 1);
                         IFXORNAT (X_ALL | X_PEND)
                             emit_and32c(dyn, ninst, rex, x1, i64, x3, x4);
                     }
@@ -627,11 +627,11 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             i64 = F32S;
                         else
                             i64 = F8S;
-                        MOV64xw(x9, i64);
+                        MOV64xw(x7, i64);
                         ANDI(x1, wback, (1 << (rex.w + 2)) - 1);
                         BNEZ_MARK3(x1);
                         // Aligned
-                        SUB(x4, xZR, x9);
+                        SUB(x4, xZR, x7);
                         AMOADDxw(x1, x4, wback, 1, 1);
                         B_MARK_nocond;
                         MARK3;
@@ -640,7 +640,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         MARK2; // Use MARK2 as a "MARKLOCK" since we're running out of marks.
                         LDxw(x1, wback, 0);
                         LRxw(x6, x5, 1, 1);
-                        SUBxw(x4, x1, x9);
+                        SUBxw(x4, x1, x7);
                         SCxw(x3, x6, x5, 1, 1);
                         BNEZ_MARK2(x3);
                         SDxw(x4, wback, 0);
@@ -670,8 +670,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             i64 = F32S;
                         else
                             i64 = F8S;
-                        MOV64xw(x9, i64);
-                        AMOXORxw(x1, x9, wback, 1, 1);
+                        MOV64xw(x7, i64);
+                        AMOXORxw(x1, x7, wback, 1, 1);
                         IFXORNAT (X_ALL | X_PEND)
                             emit_xor32c(dyn, ninst, rex, x1, i64, x3, x4);
                     }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 4b53e81f..af55ce5e 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -585,24 +585,22 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
                 SLLI(x2, x2, 3);
                 ADD(x3, x3, x2);
             }
-            LD(x2, x3, 0); // LR_D(x2, x3, 1, 1);
+            LD(x2, x3, 0);
         }
     } else {
         uintptr_t p = getJumpTableAddress64(ip);
         MAYUSE(p);
         TABLE64(x3, p);
         GETIP_(ip);
-        LD(x2, x3, 0); // LR_D(x2, x3, 1, 1);
-    }
-    if (reg != A1) {
-        MV(A1, xRIP);
+        LD(x2, x3, 0);
     }
     CLEARIP();
-#ifdef HAVE_TRACE
-// MOVx(x3, 15);    no access to PC reg
-#endif
     SMEND();
+#ifdef HAVE_TRACE
+    JALR(xRA, x2);
+#else
     JALR((dyn->insts[ninst].x64.has_callret ? xRA : xZR), x2);
+#endif
 }
 
 void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex)
@@ -779,7 +777,7 @@ void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits)
     CLEARIP();
 }
 
-void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg)
+void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6)
 {
     MAYUSE(fnc);
     if (savereg == 0)
@@ -791,40 +789,44 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
     fpu_pushcache(dyn, ninst, reg, 0);
     if (ret != -2) {
         SUBI(xSP, xSP, 16); // RV64 stack needs to be 16byte aligned
-        SD(xEmu, xSP, 0);
-        SD(savereg, xSP, 8);
-        // x5..x8, x10..x17, x28..x31 those needs to be saved by caller
-        STORE_REG(RAX);
-        STORE_REG(RCX);
+        SD(savereg, xSP, 0);
+        STORE_REG(RDI);
+        STORE_REG(RSI);
         STORE_REG(RDX);
-        STORE_REG(R12);
-        STORE_REG(R13);
-        STORE_REG(R14);
-        STORE_REG(R15);
+        STORE_REG(RCX);
+        STORE_REG(R8);
+        STORE_REG(R9);
+        STORE_REG(RAX);
         SD(xRIP, xEmu, offsetof(x64emu_t, ip));
     }
     TABLE64(reg, (uintptr_t)fnc);
+    MV(A0, xEmu);
+    if (arg1) MV(A1, arg1);
+    if (arg2) MV(A2, arg2);
+    if (arg3) MV(A3, arg3);
+    if (arg4) MV(A4, arg4);
+    if (arg5) MV(A5, arg5);
+    if (arg6) MV(A6, arg6);
     JALR(xRA, reg);
     if (ret >= 0) {
-        MV(ret, xEmu);
+        MV(ret, A0);
     }
-    if (ret != -2) {
-        LD(xEmu, xSP, 0);
-        LD(savereg, xSP, 8);
-        ADDI(xSP, xSP, 16);
+
+    LD(savereg, xSP, 0);
+    ADDI(xSP, xSP, 16);
 #define GO(A) \
     if (ret != x##A) { LOAD_REG(A); }
-        GO(RAX);
-        GO(RCX);
-        GO(RDX);
-        GO(R12);
-        GO(R13);
-        GO(R14);
-        GO(R15);
-        if (ret != xRIP)
-            LD(xRIP, xEmu, offsetof(x64emu_t, ip));
+    GO(RDI);
+    GO(RSI);
+    GO(RDX);
+    GO(RCX);
+    GO(R8);
+    GO(R9);
+    GO(RAX);
 #undef GO
-    }
+    if (ret != xRIP)
+        LD(xRIP, xEmu, offsetof(x64emu_t, ip));
+
     // regenerate mask
     XORI(xMASK, xZR, -1);
     SRLI(xMASK, xMASK, 32);
@@ -845,54 +847,16 @@ void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int sav
 void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w)
 {
     MAYUSE(fnc);
-    FLAGS_ADJUST_TO11(xFlags, xFlags, x3);
-    SD(xFlags, xEmu, offsetof(x64emu_t, eflags));
     fpu_pushcache(dyn, ninst, x3, 1);
-    // x5..x8, x10..x17, x28..x31 those needs to be saved by caller
-    // RDI, RSI, RDX, RCX, R8, R9 are used for function call
-    SUBI(xSP, xSP, 16);
-    SD(xEmu, xSP, 0);
-    SD(xRIP, xSP, 8); // RV64 stack needs to be 16byte aligned
-    STORE_REG(R12);
-    STORE_REG(R13);
-    STORE_REG(R14);
-    STORE_REG(R15);
-    /*
-    // float and double args
-    if (abs(w) > 1) {
-        MESSAGE(LOG_DUMP, "Getting %d XMM args\n", abs(w) - 1);
-        for (int i = 0; i < abs(w) - 1; ++i) {
-            sse_get_reg(dyn, ninst, x6, i, 0);
-        }
-    }
-    if (w < 0) {
-        MESSAGE(LOG_DUMP, "Return in XMM0\n");
-        sse_get_reg_empty(dyn, ninst, x6, 0, 0);
-    }
-    */
-    // prepare regs for native call
-    MV(A0, xRDI);
-    MV(A1, xRSI);
-    MV(A2, xRDX);
-    MV(A3, xRCX);
-    MV(A4, xR8);
-    MV(A5, xR9);
     // native call
-    TABLE64(xRAX, (uintptr_t)fnc); // using xRAX as scratch regs for call address
-    JALR(xRA, xRAX);
+    TABLE64(x3, (uintptr_t)fnc);
+    JALR(xRA, x3);
     // put return value in x64 regs
     if (w > 0) {
         MV(xRAX, A0);
         MV(xRDX, A1);
     }
     // all done, restore all regs
-    LD(xEmu, xSP, 0);
-    LD(xRIP, xSP, 8);
-    ADDI(xSP, xSP, 16);
-    LOAD_REG(R12);
-    LOAD_REG(R13);
-    LOAD_REG(R14);
-    LOAD_REG(R15);
     // regenerate mask
     XORI(xMASK, xZR, -1);
     SRLI(xMASK, xMASK, 32);
@@ -902,8 +866,6 @@ void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w)
         vector_vsetvli(dyn, ninst, x3, dyn->vector_sew, VECTOR_LMUL1, 1);
 
     fpu_popcache(dyn, ninst, x3, 1);
-    LD(xFlags, xEmu, offsetof(x64emu_t, eflags));
-    FLAGS_ADJUST_FROM11(xFlags, xFlags, x3);
     // SET_NODF();
 }
 
@@ -927,7 +889,7 @@ void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int s
         CBZ_MARKSEG(t1);
     }
     MOV64x(x1, segment);
-    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 0, xFlags);
+    call_c(dyn, ninst, GetSegmentBaseEmu, t2, reg, 0, xFlags, x1, 0, 0, 0, 0, 0);
     MARKSEG;
     MESSAGE(LOG_DUMP, "----%s Offset\n", (segment == _FS) ? "FS" : "GS");
 }
@@ -2813,7 +2775,7 @@ static void flagsCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1)
             j64 = (GETMARKF2) - (dyn->native_size);
             BEQZ(s1, j64);
         }
-        CALL_(UpdateFlags, -1, 0);
+        CALL_(UpdateFlags, -1, 0, 0, 0);
         MARKF2;
     }
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index ee61c13d..d78761cc 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -679,13 +679,15 @@
 
 // CALL will use x6 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
-#define CALL(F, ret) call_c(dyn, ninst, F, x6, ret, 1, 0)
+#define CALL(F, ret, arg1, arg2)                          call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, 0, 0, 0, 0)
+#define CALL4(F, ret, arg1, arg2, arg3, arg4)             call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, arg3, arg4, 0, 0)
+#define CALL6(F, ret, arg1, arg2, arg3, arg4, arg5, arg6) call_c(dyn, ninst, F, x6, ret, 1, 0, arg1, arg2, arg3, arg4, arg5, arg6)
 // CALL_ will use x6 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2
-#define CALL_(F, ret, reg) call_c(dyn, ninst, F, x6, ret, 1, reg)
+#define CALL_(F, ret, reg, arg1, arg2) call_c(dyn, ninst, F, x6, ret, 1, reg, arg1, arg2, 0, 0, 0, 0)
 // CALL_S will use x6 for the call address. Return value can be put in ret (unless ret is -1)
 // R0 will not be pushed/popd if ret is -2. Flags are not save/restored
-#define CALL_S(F, ret) call_c(dyn, ninst, F, x6, ret, 0, 0)
+#define CALL_S(F, ret, arg1) call_c(dyn, ninst, F, x6, ret, 0, 0, arg1, 0, 0, 0, 0, 0)
 
 #define MARKi(i)    dyn->insts[ninst].mark[i] = dyn->native_size
 #define GETMARKi(i) dyn->insts[ninst].mark[i]
@@ -810,49 +812,29 @@
 #define LOAD_REG(A)  LD(x##A, xEmu, offsetof(x64emu_t, regs[_##A]))
 
 // Need to also store current value of some register, as they may be used by functions like setjmp
-#define STORE_XEMU_CALL(s0)                             \
-    if (rv64_xtheadmempair) {                           \
-        ADDI(s0, xEmu, offsetof(x64emu_t, regs[_RSP])); \
-        TH_SDD(xRDX, xRBX, xEmu, 1);                    \
-        TH_SDD(xRSP, xRBP, s0, 0);                      \
-        TH_SDD(xRSI, xRDI, s0, 1);                      \
-        TH_SDD(xR8, xR9, s0, 2);                        \
-        TH_SDD(xR10, xR11, s0, 3);                      \
-    } else {                                            \
-        STORE_REG(RBX);                                 \
-        STORE_REG(RDX);                                 \
-        STORE_REG(RSP);                                 \
-        STORE_REG(RBP);                                 \
-        STORE_REG(RDI);                                 \
-        STORE_REG(RSI);                                 \
-        STORE_REG(R8);                                  \
-        STORE_REG(R9);                                  \
-        STORE_REG(R10);                                 \
-        STORE_REG(R11);                                 \
-    }
+#define STORE_XEMU_CALL(s0) \
+    STORE_REG(RBX);         \
+    STORE_REG(RSP);         \
+    STORE_REG(RBP);         \
+    STORE_REG(R10);         \
+    STORE_REG(R11);         \
+    STORE_REG(R12);         \
+    STORE_REG(R13);         \
+    STORE_REG(R14);         \
+    STORE_REG(R15);
 
 #define LOAD_XEMU_CALL()
 
-#define LOAD_XEMU_REM(s0)                               \
-    if (rv64_xtheadmempair) {                           \
-        ADDI(s0, xEmu, offsetof(x64emu_t, regs[_RSP])); \
-        TH_LDD(xRDX, xRBX, xEmu, 1);                    \
-        TH_LDD(xRSP, xRBP, s0, 0);                      \
-        TH_LDD(xRSI, xRDI, s0, 1);                      \
-        TH_LDD(xR8, xR9, s0, 2);                        \
-        TH_LDD(xR10, xR11, s0, 3);                      \
-    } else {                                            \
-        LOAD_REG(RBX);                                  \
-        LOAD_REG(RDX);                                  \
-        LOAD_REG(RSP);                                  \
-        LOAD_REG(RBP);                                  \
-        LOAD_REG(RDI);                                  \
-        LOAD_REG(RSI);                                  \
-        LOAD_REG(R8);                                   \
-        LOAD_REG(R9);                                   \
-        LOAD_REG(R10);                                  \
-        LOAD_REG(R11);                                  \
-    }
+#define LOAD_XEMU_REM(s0) \
+    LOAD_REG(RBX);        \
+    LOAD_REG(RSP);        \
+    LOAD_REG(RBP);        \
+    LOAD_REG(R10);        \
+    LOAD_REG(R11);        \
+    LOAD_REG(R12);        \
+    LOAD_REG(R13);        \
+    LOAD_REG(R14);        \
+    LOAD_REG(R15);
 
 
 #define SET_DFNONE()                               \
@@ -869,7 +851,7 @@
         MOV_U12(S, (N));                                                                                                        \
         SW(S, xEmu, offsetof(x64emu_t, df));                                                                                    \
         if (dyn->f.pending == SF_PENDING && dyn->insts[ninst].x64.need_after && !(dyn->insts[ninst].x64.need_after & X_PEND)) { \
-            CALL_(UpdateFlags, -1, 0);                                                                                          \
+            CALL_(UpdateFlags, -1, 0, 0, 0);                                                                                    \
             dyn->f.pending = SF_SET;                                                                                            \
             SET_NODF();                                                                                                         \
         }                                                                                                                       \
@@ -1013,7 +995,7 @@
             j64 = (GETMARKF) - (dyn->native_size);  \
             BEQ(x3, xZR, j64);                      \
         }                                           \
-        CALL_(UpdateFlags, -1, 0);                  \
+        CALL_(UpdateFlags, -1, 0, 0, 0);            \
         MARKF;                                      \
         dyn->f.pending = SF_SET;                    \
         SET_DFOK();                                 \
@@ -1169,7 +1151,7 @@
 
 void rv64_epilog(void);
 void rv64_epilog_fast(void);
-void* rv64_next(x64emu_t* emu, uintptr_t addr);
+void* rv64_next(void);
 
 #ifndef STEPNAME
 #define STEPNAME3(N, M) N##M
@@ -1394,7 +1376,7 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst, int is3
 void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex);
 void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n);
 void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits);
-void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
+void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg, int arg1, int arg2, int arg3, int arg4, int arg5, int arg6);
 void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w);
 void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int segment);
 void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6);
diff --git a/src/dynarec/rv64/dynarec_rv64_private.h b/src/dynarec/rv64/dynarec_rv64_private.h
index 126ce630..9e876fd3 100644
--- a/src/dynarec/rv64/dynarec_rv64_private.h
+++ b/src/dynarec/rv64/dynarec_rv64_private.h
@@ -200,12 +200,12 @@ int Table64(dynarec_rv64_t *dyn, uint64_t val, int pass);  // add a value to tab
 
 void CreateJmpNext(void* addr, void* next);
 
-#define GO_TRACE(A, B, s0)  \
-    GETIP(addr);            \
-    MV(A1, xRIP);           \
-    STORE_XEMU_CALL(s0);    \
-    MOV64x(A2, B);          \
-    CALL(A, -1);            \
+#define GO_TRACE(A, B, s0) \
+    GETIP(addr);           \
+    MV(x1, xRIP);          \
+    STORE_XEMU_CALL(s0);   \
+    MOV64x(x2, B);         \
+    CALL(A, -1, x1, x2);   \
     LOAD_XEMU_CALL()
 
 #endif //__DYNAREC_RV64_PRIVATE_H_
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 39e0b7bd..ecac8a46 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -1,85 +1,9 @@
 #ifndef __RV64_EMITTER_H__
 #define __RV64_EMITTER_H__
-/*
-    RV64 Emitter
-
-*/
-
-// RV64 ABI
-/*
-reg     name    description                     saver
-------------------------------------------------------
-x0      zero    Hard-wired zero                 —
-x1      ra      Return address                  Caller
-x2      sp      Stack pointer                   Callee
-x3      gp      Global pointer                  —
-x4      tp      Thread pointer                  —
-x5–7    t0–2    Temporaries                     Caller
-x8      s0/fp   Saved register/frame pointer    Callee
-x9      s1      Saved register                  Callee
-x10–11  a0–1    Function arguments/return val.  Caller
-x12–17  a2–7    Function arguments              Caller
-x18–27  s2–11   Saved registers                 Callee
-x28–31  t3–6    Temporaries                     Caller
--------------------------------------------------------
-f0–7    ft0–7   FP temporaries                  Caller
-f8–9    fs0–1   FP saved registers              Callee
-f10–11  fa0–1   FP arguments/return values      Caller
-f12–17  fa2–7   FP arguments                    Caller
-f18–27  fs2–11  FP saved registers              Callee
-f28–31  ft8–11  FP temporaries                  Caller
-*/
-// x86 Register mapping
-#define xRAX   16
-#define xRCX   17
-#define xRDX   18
-#define xRBX   19
-#define xRSP   20
-#define xRBP   21
-#define xRSI   22
-#define xRDI   23
-#define xR8    24
-#define xR9    25
-#define xR10   26
-#define xR11   27
-#define xR12   28
-#define xR13   29
-#define xR14   30
-#define xR15   31
-#define xFlags 8
-#define xRIP   7
-
-// convert a x86 register to native according to the register mapping
-#define TO_NAT(A) (xRAX + (A))
-
-// scratch registers
-#define x1 11
-#define x2 12
-#define x3 13
-#define x4 14
-#define x5 15
-#define x6 6
-#define x9 9
-// used to clear the upper 32bits
-#define xMASK 5
-
-// emu is r10
-#define xEmu 10
-// RV64 RA
-#define xRA 1
-#define xSP 2
-// RV64 args
-#define A0 10
-#define A1 11
-#define A2 12
-#define A3 13
-#define A4 14
-#define A5 15
-#define A6 16
-#define A7 17
-// xZR reg is 0
-#define xZR 0
-#define wZR xZR
+
+#include "rv64_mapping.h"
+
+// RV64 Emitter
 
 // replacement for F_OF internaly, using a reserved bit. Need to use F_OF2 internaly, never F_OF directly!
 #define F_OF2 F_res3
diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S
index 1f819114..5c6c495a 100644
--- a/src/dynarec/rv64/rv64_epilog.S
+++ b/src/dynarec/rv64/rv64_epilog.S
@@ -1,7 +1,6 @@
-//riscv epilog for dynarec
-//Save stuff, prepare stack and register
-//called with pointer to emu as 1st parameter
-//and address to jump to as 2nd parameter
+#define ASM_MAPPING 1
+#include "rv64_mapping.h"
+#undef ASM_MAPPING
 
 .text
 .align 4
@@ -9,60 +8,70 @@
 .global rv64_epilog
 .global rv64_epilog_fast
 
+// rv64_epilog(void)
 rv64_epilog:
-    //update register -> emu
-    sd      x16, (a0)
-    sd      x17, 8(a0)
-    sd      x18, 16(a0)
-    sd      x19, 24(a0)
-    sd      x20, 32(a0)
-    sd      x21, 40(a0)
-    sd      x22, 48(a0)
-    sd      x23, 56(a0)
-    sd      x24, 64(a0)
-    sd      x25, 72(a0)
-    sd      x26, 80(a0)
-    sd      x27, 88(a0)
-    sd      x28, 96(a0)
-    sd      x29, 104(a0)
-    sd      x30, 112(a0)
-    sd      x31, 120(a0)
+
     // adjust flags bit 5 -> bit 11
-    li      x5, ~(1<<11)
-    and     x8, x8, x5
-    andi    x5, x8, 1<<5
-    slli    x5, x5, 11-5
-    or      x8, x8, x5
-    sd      x8, 128(a0)     //xFlags
-    sd      x7, 136(a0)     // put back reg value in emu, including EIP (so x7 must be EIP now)
+    li      t3, ~(1<<11)
+    and     Flags, Flags, t3
+    andi    t3, Flags, 1<<5
+    slli    t3, t3, 11-5
+    or      Flags, Flags, t3
+
+    // spill x86 registers to emu
+    sd      RAX,   0(Emu)
+    sd      RCX,   8(Emu)
+    sd      RDX,   16(Emu)
+    sd      RBX,   24(Emu)
+    sd      RSP,   32(Emu)
+    sd      RBP,   40(Emu)
+    sd      RSI,   48(Emu)
+    sd      RDI,   56(Emu)
+    sd      R8,    64(Emu)
+    sd      R9,    72(Emu)
+    sd      R10,   80(Emu)
+    sd      R11,   88(Emu)
+    sd      R12,   96(Emu)
+    sd      R13,   104(Emu)
+    sd      R14,   112(Emu)
+    sd      R15,   120(Emu)
+    sd      Flags, 128(Emu)
+    sd      RIP,   136(Emu)
+
+    /*** switch to native register naming convection ***/
+
     // fallback to epilog_fast now, just restoring saved regs
 rv64_epilog_fast:
-    ld      sp, 808(a0) // restore saved sp from emu->xSPSave, see rv64_prolog
-    ld      x9, -8(sp)
-    sd      x9, 808(a0) // put back old value
-    ld      ra, (sp)  // save ra
-    ld      x8, 8(sp) // save fp
-    ld      x18, (2*8)(sp)
-    ld      x19, (3*8)(sp)
-    ld      x20, (4*8)(sp)
-    ld      x21, (5*8)(sp)
-    ld      x22, (6*8)(sp)
-    ld      x23, (7*8)(sp)
-    ld      x24, (8*8)(sp)
-    ld      x25, (9*8)(sp)
-    ld      x26, (10*8)(sp)
-    ld      x27, (11*8)(sp)
-    ld      x9,  (12*8)(sp)
-    fld     f18, (13*8)(sp)
-    fld     f19, (14*8)(sp)
-    fld     f20, (15*8)(sp)
-    fld     f21, (16*8)(sp)
-    fld     f22, (17*8)(sp)
-    fld     f23, (19*8)(sp)
-    fld     f24, (19*8)(sp)
-    fld     f25, (20*8)(sp)
-    fld     f26, (21*8)(sp)
-    fld     f27, (22*8)(sp)
+    // restore saved sp from emu->xSPSave
+    ld      sp, 808(Emu)
+    ld      t3, -8(sp)
+    // put back old value
+    sd      t3, 808(Emu)
+
+    ld      ra,   (0*8)(sp)
+    ld      fp,   (1*8)(sp)
+    ld      s1,   (2*8)(sp)
+    ld      s2,   (3*8)(sp)
+    ld      s3,   (4*8)(sp)
+    ld      s4,   (5*8)(sp)
+    ld      s5,   (6*8)(sp)
+    ld      s6,   (7*8)(sp)
+    ld      s7,   (8*8)(sp)
+    ld      s8,   (9*8)(sp)
+    ld      s9,   (10*8)(sp)
+    ld      s10,  (11*8)(sp)
+    ld      s11,  (12*8)(sp)
+    fld     fs2,  (13*8)(sp)
+    fld     fs3,  (14*8)(sp)
+    fld     fs4,  (15*8)(sp)
+    fld     fs5,  (16*8)(sp)
+    fld     fs6,  (17*8)(sp)
+    fld     fs7,  (19*8)(sp)
+    fld     fs8,  (19*8)(sp)
+    fld     fs9,  (20*8)(sp)
+    fld     fs10, (21*8)(sp)
+    fld     fs11, (22*8)(sp)
+
+    // 16 bytes aligned
     addi    sp,  sp, (8 * 24)
-    //end, return...
     ret
diff --git a/src/dynarec/rv64/rv64_mapping.h b/src/dynarec/rv64/rv64_mapping.h
new file mode 100644
index 00000000..bc40184d
--- /dev/null
+++ b/src/dynarec/rv64/rv64_mapping.h
@@ -0,0 +1,119 @@
+#ifndef __RV64_MAPPING_H__
+#define __RV64_MAPPING_H__
+
+// RV64 Register Mapping Scheme
+/*****************************************************************************************
+reg     name   mapping      native description              Box64 description       saver
+******************************************************************************************
+x0      zero   native zero  Hard-wired zero                 N/A                     —
+x1      ra     native ra    Return address                  N/A                     Caller
+x2      sp     native sp    Stack pointer                   N/A                     Callee
+x3      gp     native gp    Global pointer                  N/A                     —
+x4      tp     native tp    Thread pointer                  N/A                     —
+x5      t0     xMask        Temporary                       Always 0xFFFFFFFF       Caller
+x6      t1     x1           Temporary                       Scratch                 Caller
+x7      t2     x2           Temporary                       Scratch                 Caller
+x8      s0/fp  RBP          Saved register/frame pointer    -                       Callee
+x9      s1     RSP          Saved register                  -                       Callee
+x10     a0     RDI          Function argument/return val.   -                       Caller
+x11     a1     RSI          Function argument/return val.   -                       Caller
+x12     a2     RDX          Function argument               -                       Caller
+x13     a3     RCX          Function argument               -                       Caller
+x14     a4     R8           Function argument               -                       Caller
+x15     a5     R9           Function argument               -                       Caller
+x16     a6     RAX          Function argument               -                       Caller
+x17     a7     x7           Function argument               The Emu struct          Caller
+x18     s2     R12          Saved register                  -                       Callee
+x19     s3     R13          Saved register                  -                       Callee
+x20     s4     R14          Saved register                  -                       Callee
+x21     s5     R15          Saved register                  -                       Callee
+x22     s6     RIP          Saved register                  -                       Callee
+x23     s7     FLAGS        Saved register                  -                       Callee
+x24     s8     RBX          Saved register                  -                       Callee
+x25     s9     xEmu         Saved register                  Scratch                 Callee
+x26     s10    R10          Saved register                  -                       Callee
+x27     s11    R11          Saved register                  -                       Callee
+x28     t3     x3           Temporary                       Scratch                 Caller
+x29     t4     x4           Temporary                       Scratch                 Caller
+x30     t5     x5           Temporary                       Scratch                 Caller
+x31     t6     x6           Temporary                       Scratch                 Caller
+******************************************************************************************/
+
+#ifndef ASM_MAPPING
+
+#include <stdint.h>
+
+// x86 Register mapping
+#define xRAX   16
+#define xRCX   13
+#define xRDX   12
+#define xRBX   24
+#define xRSP   9
+#define xRBP   8
+#define xRSI   11
+#define xRDI   10
+#define xR8    14
+#define xR9    15
+#define xR10   26
+#define xR11   27
+#define xR12   18
+#define xR13   19
+#define xR14   20
+#define xR15   21
+#define xRIP   22
+#define xFlags 23
+
+// convert a x86 register to native according to the register mapping
+#define TO_NAT(A) (((uint8_t[]) { 16, 13, 12, 24, 9, 8, 11, 10, 14, 15, 26, 27, 18, 19, 20, 21 })[(A)])
+
+#define x1   6
+#define x2   7
+#define x3   28
+#define x4   29
+#define x5   30
+#define x6   31
+#define xEmu 25
+
+#define xMASK 5
+#define x7    17
+
+#define xRA 1
+#define xSP 2
+#define A0  10
+#define A1  11
+#define A2  12
+#define A3  13
+#define A4  14
+#define A5  15
+#define A6  16
+#define A7  17
+
+#define xZR 0
+
+#else
+
+// x86 Register mapping
+#define RAX   x16
+#define RCX   x13
+#define RDX   x12
+#define RBX   x24
+#define RSP   x9
+#define RBP   x8
+#define RSI   x11
+#define RDI   x10
+#define R8    x14
+#define R9    x15
+#define R10   x26
+#define R11   x27
+#define R12   x18
+#define R13   x19
+#define R14   x20
+#define R15   x21
+#define RIP   x22
+#define Flags x23
+#define Emu   x25
+#define MASK  x5
+
+#endif
+
+#endif // __RV64_MAPPING_H__
diff --git a/src/dynarec/rv64/rv64_next.S b/src/dynarec/rv64/rv64_next.S
index ce34bb7f..a631aac3 100644
--- a/src/dynarec/rv64/rv64_next.S
+++ b/src/dynarec/rv64/rv64_next.S
@@ -1,7 +1,6 @@
-//riscv update linker table for dynarec
-//called with pointer to emu as 1st parameter
-//and address of table to as 2nd parameter
-//ip is at r12
+#define ASM_MAPPING 1
+#include "rv64_mapping.h"
+#undef ASM_MAPPING
 
 .text
 .align 4
@@ -10,42 +9,56 @@
 
 .global rv64_next
 
-    .8byte  0   // NULL pointer before rv64_next, for getDB
+    // NULL pointer before rv64_next, for getDB
+    .8byte  0
+
+// rv64(void)
 rv64_next:
-    // emu is a0
-    // IP address is a1
+
+    // 16 bytes aligned
     addi    sp,  sp,  -(8 * 10)
-    sd      a0, (sp)
-    sd      a1, 8(sp)
-    sd      x5, 16(sp)
-    sd      x7, 24(sp)
-    sd      x16, 32(sp)
-    sd      x17, 40(sp)
-    sd      x28, 48(sp)
-    sd      x29, 56(sp)
-    sd      x30, 64(sp)
-    sd      x31, 72(sp)
-
-    mv      a2, ra      // "from" is in ra, so put in a2
-    addi    a3, sp, 24   // a3 is address to change rip
+
+    // push regs we care that might be destoryed
+    sd      RDI, (0*8)(sp)
+    sd      RSI, (1*8)(sp)
+    sd      RDX, (2*8)(sp)
+    sd      RCX, (3*8)(sp)
+    sd      R8,  (4*8)(sp)
+    sd      R9,  (5*8)(sp)
+    sd      RAX, (6*8)(sp)
+    sd      RIP, (8*8)(sp)
+
+    mv      a0, Emu
+    mv      a1, RIP
+#ifdef HAVE_TRACE
+    mv      a2, ra          // "from" is in ra, so put in a2
+#endif
+    addi    a3, sp, 8*8     // a3 is address to change rip
+
     // call the function
 1:
-    auipc   a4, %pcrel_hi(LinkNext)
-    jalr    a4, %pcrel_lo(1b)
-    // preserve return value
-    mv      a3, a0
+    auipc   t4, %pcrel_hi(LinkNext)
+    jalr    t4, %pcrel_lo(1b)
+
+    // reserve return value
+    mv      t3, a0
+
     // pop regs
-    ld      a0, (sp)
-    ld      a1, 8(sp)
-    ld      x5, 16(sp)
-    ld      x7, 24(sp)
-    ld      x16, 32(sp)
-    ld      x17, 40(sp)
-    ld      x28, 48(sp)
-    ld      x29, 56(sp)
-    ld      x30, 64(sp)
-    ld      x31, 72(sp)
+    ld      RDI, (0*8)(sp)
+    ld      RSI, (1*8)(sp)
+    ld      RDX, (2*8)(sp)
+    ld      RCX, (3*8)(sp)
+    ld      R8,  (4*8)(sp)
+    ld      R9,  (5*8)(sp)
+    ld      RAX, (6*8)(sp)
+    ld      RIP, (8*8)(sp)
+
     addi    sp,  sp,  (8 * 10)
+
+    // setup MASK
+    xori    MASK, zero, -1
+    srli    MASK, MASK, 32
+
     // return offset is jump address
-    jr      a3
+    jr      t3
 
diff --git a/src/dynarec/rv64/rv64_printer.c b/src/dynarec/rv64/rv64_printer.c
index 6f7af4d2..3dc619ab 100644
--- a/src/dynarec/rv64/rv64_printer.c
+++ b/src/dynarec/rv64/rv64_printer.c
@@ -23,68 +23,68 @@ static const char gpr[32][9] = {
     "sp",
     "gp",
     "tp",
-    "t0_mask",
+    "mask_t0",
     "t1",
-    "t2_rip",
-    "s0_flags",
-    "s1",
-    "a0",
-    "a1",
-    "a2",
-    "a3",
-    "a4",
-    "a5",
-    "a6_rax",
-    "a7_rcx",
-    "s2_rdx",
-    "s3_rbx",
-    "s4_rsp",
-    "s5_rbp",
-    "s6_rsi",
-    "s7_rdi",
-    "s8_r8",
-    "s9_r9",
-    "s10_r10",
-    "s11_r11",
-    "t3_r12",
-    "t4_r13",
-    "t5_r14",
-    "t6_r15",
+    "t2",
+    "rbp_s0",
+    "rsp_s1",
+    "rdi_a0",
+    "rsi_a1",
+    "rdx_a2",
+    "rcx_a3",
+    "r8_a4",
+    "r9_a5",
+    "rax_a6",
+    "x7_a7",
+    "r12_s2",
+    "r13_s3",
+    "r14_s4",
+    "r15_s5",
+    "rip_s6",
+    "flags_s7",
+    "rbx_s8",
+    "emu_s9",
+    "r10_s10",
+    "r11_s11",
+    "t3",
+    "t4",
+    "t5",
+    "t6",
 };
 
 static const char fpr[32][5] = {
-    "ft0",
-    "ft1",
-    "ft2",
-    "ft3",
-    "ft4",
-    "ft5",
-    "ft6",
-    "ft7",
-    "fs0",
-    "fs1",
-    "fa0",
-    "fa1",
-    "fa2",
-    "fa3",
-    "fa4",
-    "fa5",
-    "fa6",
-    "fa7",
-    "fs2",
-    "fs3",
-    "fs4",
-    "fs5",
-    "fs6",
-    "fs7",
-    "fs8",
-    "fs9",
-    "fs10",
-    "fs11",
-    "ft8",
-    "ft9",
-    "ft10",
-    "ft11",
+    "f0",
+    "f1",
+    "f2",
+    "f3",
+    "f4",
+    "f5",
+    "f6",
+    "f7",
+    "f8",
+    "f9",
+    "f10",
+    "f11",
+    "f12",
+    "f13",
+    "f14",
+    "f15",
+    "f16",
+    "f17",
+    "f18",
+    "f19",
+    "f20",
+    "f21",
+    "f22",
+    "f23",
+    "f24",
+    "f25",
+    "f26",
+    "f27",
+    "f28",
+    "f29",
+    "f30",
+    "f31",
 };
 
 static const char vpr[32][4] = {
diff --git a/src/dynarec/rv64/rv64_prolog.S b/src/dynarec/rv64/rv64_prolog.S
index 67af8253..7440b905 100644
--- a/src/dynarec/rv64/rv64_prolog.S
+++ b/src/dynarec/rv64/rv64_prolog.S
@@ -1,70 +1,86 @@
-//riscv prologue for dynarec
-//Save stuff, prepare stack and register
-//called with pointer to emu as 1st parameter
-//and address to jump to as 2nd parameter
+#define ASM_MAPPING 1
+#include "rv64_mapping.h"
+#undef ASM_MAPPING
 
 .text
 .align 4
 
 .global rv64_prolog
+
+// rv64_prolog(emu, jump_address)
 rv64_prolog:
-    //save all 18 used register
-    addi    sp,  sp, -(8 * 24)  // 16 bytes aligned
-    sd      ra, (sp)  // save ra
-    sd      x8, 8(sp) // save fp
-    sd      x18, (2*8)(sp)
-    sd      x19, (3*8)(sp)
-    sd      x20, (4*8)(sp)
-    sd      x21, (5*8)(sp)
-    sd      x22, (6*8)(sp)
-    sd      x23, (7*8)(sp)
-    sd      x24, (8*8)(sp)
-    sd      x25, (9*8)(sp)
-    sd      x26, (10*8)(sp)
-    sd      x27, (11*8)(sp)
-    sd      x9,  (12*8)(sp)
-    fsd     f18, (13*8)(sp)
-    fsd     f19, (14*8)(sp)
-    fsd     f20, (15*8)(sp)
-    fsd     f21, (16*8)(sp)
-    fsd     f22, (17*8)(sp)
-    fsd     f23, (18*8)(sp)
-    fsd     f24, (19*8)(sp)
-    fsd     f25, (20*8)(sp)
-    fsd     f26, (21*8)(sp)
-    fsd     f27, (22*8)(sp)
-    //setup emu -> register
-    ld      x16, (a0)
-    ld      x17, 8(a0)
-    ld      x18, 16(a0)
-    ld      x19, 24(a0)
-    ld      x20, 32(a0)
-    ld      x21, 40(a0)
-    ld      x22, 48(a0)
-    ld      x23, 56(a0)
-    ld      x24, 64(a0)
-    ld      x25, 72(a0)
-    ld      x26, 80(a0)
-    ld      x27, 88(a0)
-    ld      x28, 96(a0)
-    ld      x29, 104(a0)
-    ld      x30, 112(a0)
-    ld      x31, 120(a0)
-    ld      x8, 128(a0)     //xFlags
-    ld      x7, 136(a0)     // xRIP
-    // // adjust flags bit 11 -> bit 5
-    andi    x8, x8, ~(1<<5)    // probably not usefull?
-    srli    x5, x8, 11-5
-    andi    x5, x5, 1<<5
-    or      x8, x8, x5
-    ld      x5, 808(a0) // grab an old value of emu->xSPSave
-    sd      sp, 808(a0) // save current sp to emu->xSPSave
+
+    // 16 bytes aligned
+    addi    sp,   sp, -(8 * 24)
+
+    // save callee-saved registers
+    sd      ra,   (0*8)(sp)
+    sd      fp,   (1*8)(sp)
+    sd      s1,   (2*8)(sp)
+    sd      s2,   (3*8)(sp)
+    sd      s3,   (4*8)(sp)
+    sd      s4,   (5*8)(sp)
+    sd      s5,   (6*8)(sp)
+    sd      s6,   (7*8)(sp)
+    sd      s7,   (8*8)(sp)
+    sd      s8,   (9*8)(sp)
+    sd      s9,   (10*8)(sp)
+    sd      s10,  (11*8)(sp)
+    sd      s11,  (12*8)(sp)
+    fsd     fs2,  (13*8)(sp)
+    fsd     fs3,  (14*8)(sp)
+    fsd     fs4,  (15*8)(sp)
+    fsd     fs5,  (16*8)(sp)
+    fsd     fs6,  (17*8)(sp)
+    fsd     fs7,  (18*8)(sp)
+    fsd     fs8,  (19*8)(sp)
+    fsd     fs9,  (20*8)(sp)
+    fsd     fs10, (21*8)(sp)
+    fsd     fs11, (22*8)(sp)
+
+    // save a1
+    mv      t6,  a1
+
+    /*** switch to box64 register naming convection ***/
+
+    // load x86 registers from emu
+    mv      Emu, a0
+    ld      RAX,   0(Emu)
+    ld      RCX,   8(Emu)
+    ld      RDX,   16(Emu)
+    ld      RBX,   24(Emu)
+    ld      RSP,   32(Emu)
+    ld      RBP,   40(Emu)
+    ld      RSI,   48(Emu)
+    ld      RDI,   56(Emu)
+    ld      R8,    64(Emu)
+    ld      R9,    72(Emu)
+    ld      R10,   80(Emu)
+    ld      R11,   88(Emu)
+    ld      R12,   96(Emu)
+    ld      R13,   104(Emu)
+    ld      R14,   112(Emu)
+    ld      R15,   120(Emu)
+    ld      Flags, 128(Emu)
+    ld      RIP,   136(Emu)
+
+    // adjust flags bit 11 -> bit 5
+    andi    Flags, Flags, ~(1<<5)   // probably not usefull?
+    srli    t3, Flags, 11-5
+    andi    t3, t3, 1<<5
+    or      Flags, Flags, t3
+
+    ld      t3, 808(Emu) // grab an old value of emu->xSPSave
+    sd      sp, 808(Emu) // save current sp to emu->xSPSave
+
     // push sentinel onto the stack
-    sd      x5, -16(sp)
+    sd      t3, -16(sp)
     sd      zero, -8(sp)
     addi    sp, sp, -16
-    // setup xMASK
-    xori    x5, x0, -1
-    srli    x5, x5, 32
+
+    // setup MASK
+    xori    MASK, zero, -1
+    srli    MASK, MASK, 32
+
     // jump to block
-    jr    a1
+    jr      t6
diff --git a/src/libtools/signal32.c b/src/libtools/signal32.c
index 6dace456..3742d0c7 100644
--- a/src/libtools/signal32.c
+++ b/src/libtools/signal32.c
@@ -487,7 +487,7 @@ void my_sigactionhandler_oldcode_32(int32_t sig, int simple, siginfo_t* info, vo
     if(p) {
         pc = (void*)p->uc_mcontext.__gregs[0];
         if(db)
-            frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP];
+            frame = (uintptr_t)p->uc_mcontext.__gregs[9];
     }
 #else
 #error Unsupported architecture
@@ -573,13 +573,13 @@ void my_sigactionhandler_oldcode_32(int32_t sig, int simple, siginfo_t* info, vo
 #elif defined(RV64)
     if(db && p) {
         sigcontext->uc_mcontext.gregs[I386_EAX] = p->uc_mcontext.__gregs[16];
-        sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.__gregs[17];
-        sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.__gregs[18];
-        sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.__gregs[19];
-        sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.__gregs[20];
-        sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.__gregs[21];
-        sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.__gregs[22];
-        sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.__gregs[23];
+        sigcontext->uc_mcontext.gregs[I386_ECX] = p->uc_mcontext.__gregs[13];
+        sigcontext->uc_mcontext.gregs[I386_EDX] = p->uc_mcontext.__gregs[12];
+        sigcontext->uc_mcontext.gregs[I386_EBX] = p->uc_mcontext.__gregs[24];
+        sigcontext->uc_mcontext.gregs[I386_ESP] = p->uc_mcontext.__gregs[9];
+        sigcontext->uc_mcontext.gregs[I386_EBP] = p->uc_mcontext.__gregs[8];
+        sigcontext->uc_mcontext.gregs[I386_ESI] = p->uc_mcontext.__gregs[11];
+        sigcontext->uc_mcontext.gregs[I386_EDI] = p->uc_mcontext.__gregs[10];
         sigcontext->uc_mcontext.gregs[I386_EIP] = getX64Address(db, (uintptr_t)pc);
     }
 #else
diff --git a/src/libtools/signals.c b/src/libtools/signals.c
index db4a5709..290452a2 100644
--- a/src/libtools/signals.c
+++ b/src/libtools/signals.c
@@ -501,8 +501,8 @@ x64emu_t* getEmuSignal(x64emu_t* emu, ucontext_t* p, dynablock_t* db)
             emu = (x64emu_t*)p->uc_mcontext.__gregs[4];
         }
 #elif defined(RV64)
-        if(db && p->uc_mcontext.__gregs[10]>0x10000) {
-            emu = (x64emu_t*)p->uc_mcontext.__gregs[10];
+        if(db && p->uc_mcontext.__gregs[25]>0x10000) {
+            emu = (x64emu_t*)p->uc_mcontext.__gregs[25];
         }
 #else
 #error Unsupported Architecture
@@ -604,23 +604,23 @@ void copyUCTXreg2Emu(x64emu_t* emu, ucontext_t* p, uintptr_t ip) {
         emu->eflags.x64 = p->uc_mcontext.__gregs[31];
 #elif defined(RV64)
         emu->regs[_AX].q[0] = p->uc_mcontext.__gregs[16];
-        emu->regs[_CX].q[0] = p->uc_mcontext.__gregs[17];
-        emu->regs[_DX].q[0] = p->uc_mcontext.__gregs[18];
-        emu->regs[_BX].q[0] = p->uc_mcontext.__gregs[19];
-        emu->regs[_SP].q[0] = p->uc_mcontext.__gregs[20];
-        emu->regs[_BP].q[0] = p->uc_mcontext.__gregs[21];
-        emu->regs[_SI].q[0] = p->uc_mcontext.__gregs[22];
-        emu->regs[_DI].q[0] = p->uc_mcontext.__gregs[23];
-        emu->regs[_R8].q[0] = p->uc_mcontext.__gregs[24];
-        emu->regs[_R9].q[0] = p->uc_mcontext.__gregs[25];
+        emu->regs[_CX].q[0] = p->uc_mcontext.__gregs[13];
+        emu->regs[_DX].q[0] = p->uc_mcontext.__gregs[12];
+        emu->regs[_BX].q[0] = p->uc_mcontext.__gregs[24];
+        emu->regs[_SP].q[0] = p->uc_mcontext.__gregs[9];
+        emu->regs[_BP].q[0] = p->uc_mcontext.__gregs[8];
+        emu->regs[_SI].q[0] = p->uc_mcontext.__gregs[11];
+        emu->regs[_DI].q[0] = p->uc_mcontext.__gregs[10];
+        emu->regs[_R8].q[0] = p->uc_mcontext.__gregs[14];
+        emu->regs[_R9].q[0] = p->uc_mcontext.__gregs[15];
         emu->regs[_R10].q[0] = p->uc_mcontext.__gregs[26];
         emu->regs[_R11].q[0] = p->uc_mcontext.__gregs[27];
-        emu->regs[_R12].q[0] = p->uc_mcontext.__gregs[28];
-        emu->regs[_R13].q[0] = p->uc_mcontext.__gregs[29];
-        emu->regs[_R14].q[0] = p->uc_mcontext.__gregs[30];
-        emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[31];
-        emu->ip.q[0] = ip;
-        emu->eflags.x64 = p->uc_mcontext.__gregs[8];
+        emu->regs[_R12].q[0] = p->uc_mcontext.__gregs[18];
+        emu->regs[_R13].q[0] = p->uc_mcontext.__gregs[19];
+        emu->regs[_R14].q[0] = p->uc_mcontext.__gregs[20];
+        emu->regs[_R15].q[0] = p->uc_mcontext.__gregs[21];
+    emu->ip.q[0] = ip;
+    emu->eflags.x64 = p->uc_mcontext.__gregs[23];
 #else
 #error  Unsupported architecture
 #endif
@@ -972,7 +972,7 @@ void my_sigactionhandler_oldcode(x64emu_t* emu, int32_t sig, int simple, siginfo
     if(p) {
         pc = (void*)p->uc_mcontext.__gregs[0];
         if(db)
-            frame = (uintptr_t)p->uc_mcontext.__gregs[16+_SP];
+            frame = (uintptr_t)p->uc_mcontext.__gregs[9];
     }
 #else
 #error Unsupported architecture
@@ -1075,21 +1075,21 @@ void my_sigactionhandler_oldcode(x64emu_t* emu, int32_t sig, int simple, siginfo
 #elif defined(RV64)
     if(db && p) {
         sigcontext->uc_mcontext.gregs[X64_RAX] = p->uc_mcontext.__gregs[16];
-        sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.__gregs[17];
-        sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.__gregs[18];
-        sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.__gregs[19];
-        sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.__gregs[20];
-        sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.__gregs[21];
-        sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.__gregs[22];
-        sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.__gregs[23];
-        sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.__gregs[24];
-        sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.__gregs[25];
+        sigcontext->uc_mcontext.gregs[X64_RCX] = p->uc_mcontext.__gregs[13];
+        sigcontext->uc_mcontext.gregs[X64_RDX] = p->uc_mcontext.__gregs[12];
+        sigcontext->uc_mcontext.gregs[X64_RBX] = p->uc_mcontext.__gregs[24];
+        sigcontext->uc_mcontext.gregs[X64_RSP] = p->uc_mcontext.__gregs[9];
+        sigcontext->uc_mcontext.gregs[X64_RBP] = p->uc_mcontext.__gregs[8];
+        sigcontext->uc_mcontext.gregs[X64_RSI] = p->uc_mcontext.__gregs[11];
+        sigcontext->uc_mcontext.gregs[X64_RDI] = p->uc_mcontext.__gregs[10];
+        sigcontext->uc_mcontext.gregs[X64_R8] = p->uc_mcontext.__gregs[14];
+        sigcontext->uc_mcontext.gregs[X64_R9] = p->uc_mcontext.__gregs[15];
         sigcontext->uc_mcontext.gregs[X64_R10] = p->uc_mcontext.__gregs[26];
         sigcontext->uc_mcontext.gregs[X64_R11] = p->uc_mcontext.__gregs[27];
-        sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.__gregs[28];
-        sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.__gregs[29];
-        sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.__gregs[30];
-        sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[31];
+        sigcontext->uc_mcontext.gregs[X64_R12] = p->uc_mcontext.__gregs[18];
+        sigcontext->uc_mcontext.gregs[X64_R13] = p->uc_mcontext.__gregs[19];
+        sigcontext->uc_mcontext.gregs[X64_R14] = p->uc_mcontext.__gregs[20];
+        sigcontext->uc_mcontext.gregs[X64_R15] = p->uc_mcontext.__gregs[21];
         sigcontext->uc_mcontext.gregs[X64_RIP] = getX64Address(db, (uintptr_t)pc);
     }
 #else
@@ -1676,12 +1676,12 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for
             rsp = (void*)p->uc_mcontext.__gregs[12+_SP];
         }
 #elif defined(RV64)
-        if(db && p->uc_mcontext.__gregs[10]>0x10000) {
-            emu = (x64emu_t*)p->uc_mcontext.__gregs[10];
+        if(db && p->uc_mcontext.__gregs[25]>0x10000) {
+            emu = (x64emu_t*)p->uc_mcontext.__gregs[25];
         }
         if(db) {
             x64pc = getX64Address(db, (uintptr_t)pc);
-            rsp = (void*)p->uc_mcontext.__gregs[16+_SP];
+            rsp = (void*)p->uc_mcontext.__gregs[9];
         }
 #else
 #error Unsupported Architecture
@@ -1837,7 +1837,7 @@ dynarec_log(/*LOG_DEBUG*/LOG_INFO, "Repeated SIGSEGV with Access error on %p for
                 shown_regs = 1;
                 for (int i=0; i<16; ++i) {
                     if(!(i%4)) printf_log(log_minimum, "\n");
-                    printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[16+i]);
+                    printf_log(log_minimum, "%s:0x%016llx ", reg_name[i], p->uc_mcontext.__gregs[(((uint8_t[]) { 16, 13, 12, 24, 9, 8, 11, 10, 14, 15, 26, 27, 18, 19, 20, 21 })[i])]);
                 }
                 printf_log(log_minimum, "\n");
                 for (int i=0; i<6; ++i)