about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2023-06-26 02:32:24 +0800
committerGitHub <noreply@github.com>2023-06-25 20:32:24 +0200
commit42dc36ad5af78da0e7d255df7662e9148cdfdde2 (patch)
tree78d639acfd537c7fe0afe613be61bf61d274f7b9
parent7689d0d568eb2425c9426314a1ee5a18b8c01a74 (diff)
downloadbox64-42dc36ad5af78da0e7d255df7662e9148cdfdde2.tar.gz
box64-42dc36ad5af78da0e7d255df7662e9148cdfdde2.zip
[32BTIS][DYNAREC_RV64] Added support for 32bits (#861)
* [32BTIS][DYNAREC_RV64] Added support for 32bits

* Fixed 32bit INC/DEC

* Fixed a typo

* Some more fixes

* Fixed geted_32

* POP -> POP1, PUSH -> PUSH1
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c24
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_66.c18
-rw-r--r--src/dynarec/arm64/dynarec_arm64_6664.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66f0.c15
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_67.c79
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f0.c45
-rwxr-xr-xsrc/dynarec/arm64/dynarec_arm64_helper.h14
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_1.c114
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c58
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c57
-rw-r--r--src/dynarec/rv64/dynarec_rv64_64.c37
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c50
-rw-r--r--src/dynarec/rv64/dynarec_rv64_6664.c7
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66f0.c8
-rw-r--r--src/dynarec/rv64/dynarec_rv64_67.c12
-rw-r--r--src/dynarec/rv64/dynarec_rv64_f0.c22
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c16
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c183
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h21
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h6
-rw-r--r--src/dynarec/rv64/rv64_emitter.h26
23 files changed, 545 insertions, 283 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index 723b4460..540ab50e 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -54,13 +54,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the F0 are ignored
-    rex.rex = 0;
-    if(!rex.is32bits)
-        while(opcode>=0x40 && opcode<=0x4f) {
-            rex.rex = opcode;
-            opcode = F8;
-        }
+    GETREX();
 
     switch(opcode) {
 
@@ -293,7 +287,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GETEDO(x4, 0);
             emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
             break;
-                    
+
         case 0x39:
             INST_NAME("CMP Seg:Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -317,7 +311,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x63:
             if(rex.is32bits) {
                 // ARPL here
-                DEFAULT;                
+                DEFAULT;
             } else {
                 INST_NAME("MOVSXD Gd, Ed");
                 nextop = F8;
@@ -646,7 +640,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     eb2 = (ed&4)>>2;    // L or H
                 } else {
                     eb1 = xRAX+(nextop&7)+(rex.b<<3);
-                    eb2 = 0;            
+                    eb2 = 0;
                 }
                 MOV32w(x3, u8);
                 BFIx(eb1, x3, eb2*8, 8);
@@ -878,7 +872,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
             }
             break;
-            
+
         case 0xF7:
             nextop = F8;
             grab_segdata(dyn, addr, ninst, x6, seg);
@@ -959,8 +953,8 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRDX, x4);
                     } else {
                         if(ninst
-                           && dyn->insts[ninst-1].x64.addr 
-                           && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 
+                           && dyn->insts[ninst-1].x64.addr
+                           && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31
                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) {
                             SET_DFNONE(x2);
                             GETEDO(x6, 0);
@@ -996,7 +990,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRDX, x4);
                     } else {
                         if(ninst && dyn->insts
-                           &&  dyn->insts[ninst-1].x64.addr 
+                           &&  dyn->insts[ninst-1].x64.addr
                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48
                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) {
                             SET_DFNONE(x2)
@@ -1022,7 +1016,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
             }
             break;
-            
+
         case 0xFF:
             nextop = F8;
             grab_segdata(dyn, addr, ninst, x6, seg);
diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index c6d8420a..f2f0a0a1 100755
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -49,13 +49,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         rep = opcode-0xF1;

         opcode = F8;

     }

-    // REX prefix before the 66 are ignored

-    rex.rex = 0;

-    if(!rex.is32bits)

-        while(opcode>=0x40 && opcode<=0x4f) {

-            rex.rex = opcode;

-            opcode = F8;

-        }

+    GETREX();

 

     if(rex.w && !(opcode==0x0f || opcode==0xf0 || opcode==0x64 || opcode==0x65))   // rex.w cancels "66", but not for 66 0f type of prefix

         return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode

@@ -88,7 +82,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             emit_add16(dyn, ninst, x1, x2, x3, x4);

             BFIx(xRAX, x1, 0, 16);

             break;

-                

+

         case 0x09:

             INST_NAME("OR Ew, Gw");

             SETFLAGS(X_ALL, SF_SET_PENDING);

@@ -456,7 +450,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

             }

             break;

-            

+

         case 0x85:

             INST_NAME("TEST Ew, Gw");

             SETFLAGS(X_ALL, SF_SET_PENDING);

@@ -494,7 +488,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 BFIx(gd, x1, 0, 16);

             }

             break;

-            

+

         case 0x89:

             INST_NAME("MOV Ew, Gw");

             nextop = F8;

@@ -973,7 +967,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

             }

             break;

-            

+

         case 0xF0:

             return dynarec64_66F0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog);

 

@@ -1049,7 +1043,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

             }

             break;

-            

+

         case 0xFF:

             nextop = F8;

             switch((nextop>>3)&7) {

diff --git a/src/dynarec/arm64/dynarec_arm64_6664.c b/src/dynarec/arm64/dynarec_arm64_6664.c
index 920bb3e7..0fe59473 100644
--- a/src/dynarec/arm64/dynarec_arm64_6664.c
+++ b/src/dynarec/arm64/dynarec_arm64_6664.c
@@ -34,13 +34,7 @@ uintptr_t dynarec64_6664(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
     int unscaled;
     MAYUSE(j64);
 
-    // REX prefix before the 66 are ignored
-    rex.rex = 0;
-    if(!rex.is32bits)
-        while(opcode>=0x40 && opcode<=0x4f) {
-            rex.rex = opcode;
-            opcode = F8;
-        }
+    GETREX();
 
     /*if(rex.w && opcode!=0x0f) {   // rex.w cancels "66", but not for 66 0f type of prefix
         MESSAGE(LOG_DUMP, "Here!\n");
diff --git a/src/dynarec/arm64/dynarec_arm64_66f0.c b/src/dynarec/arm64/dynarec_arm64_66f0.c
index be92a709..3f606799 100644
--- a/src/dynarec/arm64/dynarec_arm64_66f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_66f0.c
@@ -44,13 +44,8 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the F0/66 are ignored
-    rex.rex = 0;
-    if(!rex.is32bits)
-        while(opcode>=0x40 && opcode<=0x4f) {
-            rex.rex = opcode;
-            opcode = F8;
-        }
+
+    GETREX();
 
     switch(opcode) {
         case 0x09:
@@ -124,7 +119,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     BFIx(xRAX, x1, 0, 16);
                     SMDMB();
                     break;
-                    
+
                 case 0xC1:
                     INST_NAME("LOCK XADD Gw, Ew");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -189,7 +184,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         STLXRH(x3, x1, wback);
                         CBNZx_MARKLOCK(x3);
                         B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough 
+                        MARK;   // unaligned! also, not enough
                         LDRH_U12(x1, wback, 0);
                         LDAXRB(x4, wback);
                         BFIw(x1, x4, 0, 8); // re-inject
@@ -307,7 +302,7 @@ uintptr_t dynarec64_66F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                         STLXRH(x3, x1, wback);
                         CBNZx_MARKLOCK(x3);
                         B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough 
+                        MARK;   // unaligned! also, not enough
                         LDRH_U12(x1, wback, 0);
                         LDAXRB(x4, wback);
                         BFIw(x1, x4, 0, 8); // re-inject
diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index cfe50d20..24b87d9b 100755
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -53,13 +53,8 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         return addr;

     }

 

+    GETREX();

 

-    // REX prefix before the 67 are ignored

-    rex.rex = 0;

-    while(opcode>=0x40 && opcode<=0x4f) {

-        rex.rex = opcode;

-        opcode = F8;

-    }

     rep = 0;

     while((opcode==0xF2) || (opcode==0xF3)) {

         rep = opcode-0xF1;

@@ -238,39 +233,39 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     break;

 

-                    case 0xB6:

-                        INST_NAME("MOVZX Gd, Eb");

-                        nextop = F8;

-                        GETGD;

-                        if(MODREG) {

-                            if(rex.rex) {

-                                eb1 = xRAX+(nextop&7)+(rex.b<<3);

-                                eb2 = 0;                \

-                            } else {

-                                ed = (nextop&7);

-                                eb1 = xRAX+(ed&3);  // Ax, Cx, Dx or Bx

-                                eb2 = (ed&4)>>2;    // L or H

-                            }

-                            UBFXxw(gd, eb1, eb2*8, 8);

-                        } else {

-                            SMREAD();

-                            addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff, 0, rex, NULL, 0, 0);

-                            LDB(gd, ed, fixedaddress);

-                        }

-                        break;

-                    case 0xB7:

-                        INST_NAME("MOVZX Gd, Ew");

-                        nextop = F8;

-                        GETGD;

-                        if(MODREG) {

-                            ed = xRAX+(nextop&7)+(rex.b<<3);

-                            UBFXxw(gd, ed, 0, 16);

+                case 0xB6:

+                    INST_NAME("MOVZX Gd, Eb");

+                    nextop = F8;

+                    GETGD;

+                    if(MODREG) {

+                        if(rex.rex) {

+                            eb1 = xRAX+(nextop&7)+(rex.b<<3);

+                            eb2 = 0;                \

                         } else {

-                            SMREAD();

-                            addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);

-                            LDH(gd, ed, fixedaddress);

+                            ed = (nextop&7);

+                            eb1 = xRAX+(ed&3);  // Ax, Cx, Dx or Bx

+                            eb2 = (ed&4)>>2;    // L or H

                         }

-                        break;

+                        UBFXxw(gd, eb1, eb2*8, 8);

+                    } else {

+                        SMREAD();

+                        addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff, 0, rex, NULL, 0, 0);

+                        LDB(gd, ed, fixedaddress);

+                    }

+                    break;

+                case 0xB7:

+                    INST_NAME("MOVZX Gd, Ew");

+                    nextop = F8;

+                    GETGD;

+                    if(MODREG) {

+                        ed = xRAX+(nextop&7)+(rex.b<<3);

+                        UBFXxw(gd, ed, 0, 16);

+                    } else {

+                        SMREAD();

+                        addr = geted32(dyn, addr, ninst, nextop, &ed, x2, &fixedaddress, &unscaled, 0xfff<<1, 1, rex, NULL, 0, 0);

+                        LDH(gd, ed, fixedaddress);

+                    }

+                    break;

 

                 default:

                     DEFAULT;

@@ -721,7 +716,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

             }

             break;

-            

+

         case 0x88:

             INST_NAME("MOV Eb, Gb");

             nextop = F8;

@@ -1027,9 +1022,9 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRAX, x2);

                         MOVw_REG(xRDX, x4);

                     } else {

-                        if(ninst && dyn->insts 

-                           && dyn->insts[ninst-1].x64.addr 

-                           && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31 

+                        if(ninst && dyn->insts

+                           && dyn->insts[ninst-1].x64.addr

+                           && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x31

                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0xD2) {

                             SET_DFNONE(x2);

                             GETED32(0);

@@ -1065,7 +1060,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         MOVw_REG(xRDX, x4);

                     } else {

                         if(ninst && dyn->insts

-                           &&  dyn->insts[ninst-1].x64.addr 

+                           &&  dyn->insts[ninst-1].x64.addr

                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr)==0x48

                            && *(uint8_t*)(dyn->insts[ninst-1].x64.addr+1)==0x99) {

                             SET_DFNONE(x2)

diff --git a/src/dynarec/arm64/dynarec_arm64_f0.c b/src/dynarec/arm64/dynarec_arm64_f0.c
index 0d1c7391..4a1421cc 100644
--- a/src/dynarec/arm64/dynarec_arm64_f0.c
+++ b/src/dynarec/arm64/dynarec_arm64_f0.c
@@ -46,13 +46,8 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the F0 are ignored
-    rex.rex = 0;
-    if(!rex.is32bits)
-        while(opcode>=0x40 && opcode<=0x4f) {
-            rex.rex = opcode;
-            opcode = F8;
-        }
+
+    GETREX();
 
     switch(opcode) {
         case 0x00:
@@ -66,14 +61,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     wback = xRAX + (nextop&7) + (rex.b<<3);
                     wb2 = 0;
                 } else {
-                    wback = (nextop&7);    
-                    wb2 = (wback>>2);      
+                    wback = (nextop&7);
+                    wb2 = (wback>>2);
                     wback = xRAX+(wback&3);
                 }
-                UBFXw(x1, wback, wb2*8, 8);   
+                UBFXw(x1, wback, wb2*8, 8);
                 emit_add8(dyn, ninst, x1, x2, x4, x3);
                 BFIx(wback, x1, wb2*8, 8);
-            } else {                   
+            } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                 MARKLOCK;
                 LDAXRB(x1, wback);
@@ -114,14 +109,14 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     wback = xRAX + (nextop&7) + (rex.b<<3);
                     wb2 = 0;
                 } else {
-                    wback = (nextop&7);    
-                    wb2 = (wback>>2);      
+                    wback = (nextop&7);
+                    wb2 = (wback>>2);
                     wback = xRAX+(wback&3);
                 }
-                UBFXw(x1, wback, wb2*8, 8);   
+                UBFXw(x1, wback, wb2*8, 8);
                 emit_or8(dyn, ninst, x1, x2, x4, x3);
                 BFIx(wback, x1, wb2*8, 8);
-            } else {                   
+            } else {
                 addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, NULL, 0, 0, rex, LOCK_LOCK, 0, 0);
                 MARKLOCK;
                 LDAXRB(x1, wback);
@@ -220,11 +215,11 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 if(rex.rex) {
                                     wback = xRAX+(nextop&7)+(rex.b<<3);
                                     wb2 = 0;
-                                } else { 
+                                } else {
                                     wback = (nextop&7);
                                     wb2 = (wback>>2)*8;
                                     wback = xRAX+(wback&3);
-                                } 
+                                }
                                 UBFXx(x2, wback, wb2, 8);
                                 wb1 = 0;
                                 ed = x2;
@@ -445,7 +440,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     DEFAULT;
             }
             break;
-                    
+
         case 0x21:
             INST_NAME("LOCK AND Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -465,7 +460,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             SMDMB();
             break;
-            
+
         case 0x29:
             INST_NAME("LOCK SUB Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -681,7 +676,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         CBNZx_MARKLOCK(x3);
                         SMDMB();
                         B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough 
+                        MARK;   // unaligned! also, not enough
                         LDRxw_U12(x1, wback, 0);
                         LDAXRB(x4, wback);
                         BFIxw(x1, x4, 0, 8); // re-inject
@@ -789,7 +784,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                         CBNZx_MARKLOCK(x3);
                         SMDMB();
                         B_NEXT_nocond;
-                        MARK;   // unaligned! also, not enough 
+                        MARK;   // unaligned! also, not enough
                         LDRxw_U12(x1, wback, 0);
                         LDAXRB(x4, wback);
                         BFIxw(x1, x4, 0, 8); // re-inject
@@ -834,7 +829,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }
             SMDMB();
             break;
-            
+
         case 0x86:
             INST_NAME("LOCK XCHG Eb, Gb");
             // Do the swap
@@ -896,7 +891,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 MOVxw_REG(gd, x1);
             }
             break;
-            
+
         case 0xF6:
             nextop = F8;
             switch((nextop>>3)&7) {
@@ -931,7 +926,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     DEFAULT;
             }
             break;
-        
+
         case 0xFE:
             nextop = F8;
             switch((nextop>>3)&7)
@@ -1042,7 +1037,7 @@ uintptr_t dynarec64_F0(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     DEFAULT;
             }
             break;
-            
+
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index 8002b718..9c3594ac 100755
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -784,10 +784,10 @@
     } else dyn->f.pending = SF_SET
 #endif
 #ifndef JUMP
-#define JUMP(A, C) 
+#define JUMP(A, C)
 #endif
 #ifndef BARRIER
-#define BARRIER(A) 
+#define BARRIER(A)
 #endif
 #ifndef BARRIER_NEXT
 #define BARRIER_NEXT(A)
@@ -1238,7 +1238,7 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
 #if STEP < 3
 #define MAYUSE(A)   (void)A
 #else
-#define MAYUSE(A)   
+#define MAYUSE(A)
 #endif
 
 #define GOCOND(B, T1, T2)                                   \
@@ -1346,4 +1346,12 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
         STRw_U12(s2, xEmu, offsetof(x64emu_t, test.test));  \
     }
 
+#define GETREX()                                \
+    rex.rex = 0;                                \
+    if(!rex.is32bits)                           \
+        while(opcode>=0x40 && opcode<=0x4f) {   \
+            rex.rex = opcode;                   \
+            opcode = F8;                        \
+        }
+
 #endif //__DYNAREC_ARM64_HELPER_H__
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index c7704e56..684aa490 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -31,12 +31,6 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 {
     uint8_t opcode;
 
-
-    if(rex.is32bits) {
-        DEFAULT;
-        return ip;
-    }
-
     opcode = PK(0);
     switch(opcode) {
         case 0x00 ... 0x3f: addr = dynarec64_00_0(dyn, addr, ip, ninst, rex, rep, ok, need_epilog); break;
diff --git a/src/dynarec/rv64/dynarec_rv64_00_1.c b/src/dynarec/rv64/dynarec_rv64_00_1.c
index 03e15cd2..54ca28f5 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_1.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_1.c
@@ -52,7 +52,32 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     MAYUSE(cacheupd);
 
     switch(opcode) {
-
+        case 0x40:
+        case 0x41:
+        case 0x42:
+        case 0x43:
+        case 0x44:
+        case 0x45:
+        case 0x46:
+        case 0x47:
+            INST_NAME("INC Reg (32bits)");
+            SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
+            gd = xRAX + (opcode&7);
+            emit_inc32(dyn, ninst, rex, gd, x1, x2, x3, x4);
+            break;
+        case 0x48:
+        case 0x49:
+        case 0x4A:
+        case 0x4B:
+        case 0x4C:
+        case 0x4D:
+        case 0x4E:
+        case 0x4F:
+            INST_NAME("DEC Reg (32bits)");
+            SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
+            gd = xRAX + (opcode&7);
+            emit_dec32(dyn, ninst, rex, gd, x1, x2, x3, x4);
+            break;
         case 0x50:
         case 0x51:
         case 0x52:
@@ -63,8 +88,7 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x57:
             INST_NAME("PUSH reg");
             gd = xRAX+(opcode&0x07)+(rex.b<<3);
-            SD(gd, xRSP, -8);
-            SUBI(xRSP, xRSP, 8);
+            PUSH1z(gd);
             break;
         case 0x58:
         case 0x59:
@@ -76,31 +100,65 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x5F:
             INST_NAME("POP reg");
             gd = xRAX+(opcode&0x07)+(rex.b<<3);
-            LD(gd, xRSP, 0);
-            if(gd!=xRSP) {
-                ADDI(xRSP, xRSP, 8);
+            POP1z(gd);
+            break;
+
+        case 0x60:
+            if(rex.is32bits) {
+                INST_NAME("PUSHAD");
+                AND(x1, xRSP, xMASK);
+                PUSH1_32(xRAX);
+                PUSH1_32(xRCX);
+                PUSH1_32(xRDX);
+                PUSH1_32(xRBX);
+                PUSH1_32(x1);
+                PUSH1_32(xRBP);
+                PUSH1_32(xRSI);
+                PUSH1_32(xRDI);
+            } else {
+                DEFAULT;
+            }
+            break;
+        case 0x61:
+            if(rex.is32bits) {
+                INST_NAME("POPAD");
+                POP1_32(xRDI);
+                POP1_32(xRSI);
+                POP1_32(xRBP);
+                POP1_32(x1);
+                POP1_32(xRBX);
+                POP1_32(xRDX);
+                POP1_32(xRCX);
+                POP1_32(xRAX);
+            } else {
+                DEFAULT;
             }
             break;
 
         case 0x63:
-            INST_NAME("MOVSXD Gd, Ed");
-            nextop = F8;
-            GETGD;
-            if(rex.w) {
-                if(MODREG) {   // reg <= reg
-                    ADDIW(gd, xRAX+(nextop&7)+(rex.b<<3), 0);
-                } else {                    // mem <= reg
-                    SMREAD();
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LW(gd, ed, fixedaddress);
-                }
+            if(rex.is32bits) {
+                // this is ARPL opcode
+                DEFAULT;
             } else {
-                if(MODREG) {   // reg <= reg
-                    AND(gd, xRAX+(nextop&7)+(rex.b<<3), xMASK);
-                } else {                    // mem <= reg
-                    SMREAD();
-                    addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
-                    LWU(gd, ed, fixedaddress);
+                INST_NAME("MOVSXD Gd, Ed");
+                nextop = F8;
+                GETGD;
+                if(rex.w) {
+                    if(MODREG) {   // reg <= reg
+                        ADDIW(gd, xRAX+(nextop&7)+(rex.b<<3), 0);
+                    } else {                    // mem <= reg
+                        SMREAD();
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                        LW(gd, ed, fixedaddress);
+                    }
+                } else {
+                    if(MODREG) {   // reg <= reg
+                        AND(gd, xRAX+(nextop&7)+(rex.b<<3), xMASK);
+                    } else {                    // mem <= reg
+                        SMREAD();
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                        LWU(gd, ed, fixedaddress);
+                    }
                 }
             }
             break;
@@ -123,10 +181,10 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 MESSAGE(LOG_DUMP, "PUSH then RET, using indirect\n");
                 TABLE64(x3, addr-4);
                 LW(x1, x3, 0);
-                PUSH1(x1);
+                PUSH1z(x1);
             } else {
-                MOV64x(x3, i64);
-                PUSH1(x3);
+                MOV64z(x3, i64);
+                PUSH1z(x3);
             }
             break;
         case 0x69:
@@ -165,8 +223,8 @@ uintptr_t dynarec64_00_1(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x6A:
             INST_NAME("PUSH Ib");
             i64 = F8S;
-            MOV64x(x3, i64);
-            PUSH1(x3);
+            MOV64z(x3, i64);
+            PUSH1z(x3);
             break;
         case 0x6B:
             INST_NAME("IMUL Gd, Ed, Ib");
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 9f9c7c8e..3fe52cef 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -324,7 +324,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETGD;
             if(MODREG) {   // reg <= reg
                 MVxw(xRAX+(nextop&7)+(rex.b<<3), gd);
-            } else {                    // mem <= reg
+            } else {       // mem <= reg
                 addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, &lock, 1, 0);
                 SDxw(gd, ed, fixedaddress);
                 SMWRITELOCK(lock);
@@ -399,15 +399,13 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("LEA Gd, Ed");
             nextop=F8;
             GETGD;
-            if(MODREG) {   // reg <= reg? that's an invalid operation
+            if(MODREG) { // reg <= reg? that's an invalid operation
                 DEFAULT;
-            } else {                    // mem <= reg
-                addr = geted(dyn, addr, ninst, nextop, &ed, gd, x1, &fixedaddress, rex, NULL, 0, 0);
-                if(gd!=ed) {    // it's sometimes used as a 3 bytes NOP
-                    MV(gd, ed);
-                }
-                if(!rex.w) {
-                    ZEROUP(gd);   //truncate the higher 32bits as asked
+            } else {     // mem <= reg
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 0, 0);
+                MV(gd, ed);
+                if(!rex.w || rex.is32bits) {
+                    ZEROUP(gd); // truncate the higher 32bits as asked
                 }
             }
             break;
@@ -429,17 +427,17 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             INST_NAME("POP Ed");
             nextop = F8;
             if(MODREG) {
-                POP1(xRAX+(nextop&7)+(rex.b<<3));
+                POP1z(xRAX+(nextop&7)+(rex.b<<3));
             } else {
-                POP1(x2); // so this can handle POP [ESP] and maybe some variant too
+                POP1z(x2); // so this can handle POP [ESP] and maybe some variant too
                 addr = geted(dyn, addr, ninst, nextop, &ed, x3, x1, &fixedaddress, rex, &lock, 1, 0);
                 if(ed==xRSP) {
-                    SD(x2, ed, fixedaddress);
+                    SDz(x2, ed, fixedaddress);
                 } else {
                     // complicated to just allow a segfault that can be recovered correctly
-                    SUB(xRSP, xRSP, 8);
-                    SD(x2, ed, fixedaddress);
-                    ADD(xRSP, xRSP, 8);
+                    ADDIz(xRSP, xRSP, rex.is32bits?-4:-8);
+                    SDz(x2, ed, fixedaddress);
+                    ADDIz(xRSP, xRSP, rex.is32bits?4:8);
                 }
             }
             break;
@@ -486,14 +484,15 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             break;
         case 0x9C:
             INST_NAME("PUSHF");
+            NOTEST(x1);
             READFLAGS(X_ALL);
             FLAGS_ADJUST_TO11(x3, xFlags, x2);
-            PUSH1(x3);
+            PUSH1z(x3);
             break;
         case 0x9D:
             INST_NAME("POPF");
             SETFLAGS(X_ALL, SF_SET);
-            POP1(xFlags);
+            POP1z(xFlags);
             FLAGS_ADJUST_FROM11(xFlags, x2);
             MOV32w(x1, 0x3F7FD7);
             AND(xFlags, xFlags, x1);
@@ -511,26 +510,35 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         case 0x9F:
             INST_NAME("LAHF");
             READFLAGS(X_CF|X_PF|X_AF|X_ZF|X_SF);
-            ANDI(xRAX, xFlags, 0xFF);
-            SLLI(xRAX, xRAX, 8);
+            ANDI(x1, xFlags, 0xFF);
+            SLLI(x1, x1, 8);
+            MOV64x(x2, 0xffffffffffff00ffLL);
+            AND(xRAX, xRAX, x2);
+            OR(xRAX, xRAX, x1);
+            break;
+        case 0xA0:
+            INST_NAME("MOV AL,Ob");
+            if(rex.is32bits) u64 = F32; else u64 = F64;
+            MOV64z(x1, u64);
+            LBU(xRAX, x1, 0);
             break;
         case 0xA1:
             INST_NAME("MOV EAX,Od");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits) u64 = F32; else u64 = F64;
+            MOV64z(x1, u64);
             LDxw(xRAX, x1, 0);
             break;
         case 0xA2:
             INST_NAME("MOV Ob,AL");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits) u64 = F32; else u64 = F64;
+            MOV64z(x1, u64);
             SB(xRAX, x1, 0);
             SMWRITE();
             break;
         case 0xA3:
             INST_NAME("MOV Od,EAX");
-            u64 = F64;
-            MOV64x(x1, u64);
+            if(rex.is32bits) u64 = F32; else u64 = F64;
+            MOV64z(x1, u64);
             SDxw(xRAX, x1, 0);
             SMWRITE();
             break;
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index f0428791..bf14373c 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -196,7 +196,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             BARRIER(BARRIER_FLOAT);
             i32 = F16;
-            retn_to_epilog(dyn, ninst, i32);
+            retn_to_epilog(dyn, ninst, rex, i32);
             *need_epilog = 0;
             *ok = 0;
             break;
@@ -207,7 +207,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 READFLAGS(X_PEND);  // so instead, force the deferred flags, so it's not too slow, and flags are not lost
             }
             BARRIER(BARRIER_FLOAT);
-            ret_to_epilog(dyn, ninst);
+            ret_to_epilog(dyn, ninst, rex);
             *need_epilog = 0;
             *ok = 0;
             break;
@@ -279,8 +279,8 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
 
         case 0xC9:
             INST_NAME("LEAVE");
-            MV(xRSP, xRBP);
-            POP1(xRBP);
+            MVz(xRSP, xRBP);
+            POP1z(xRBP);
             break;
 
         case 0xCC:
@@ -627,7 +627,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 #endif
             }
             #if STEP < 2
-            if(isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn))
+            if(!rex.is32bits && isNativeCall(dyn, addr+i32, &dyn->insts[ninst].natcall, &dyn->insts[ninst].retn))
                 tmp = dyn->insts[ninst].pass2choice = 3;
             else
                 tmp = dyn->insts[ninst].pass2choice = 0;
@@ -704,12 +704,13 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         *need_epilog = 0;
                         *ok = 0;
                     }
-                    if(addr<0x100000000LL) {
-                        MOV64x(x2, addr);
+
+                    if(rex.is32bits) {
+                        MOV32w(x2, addr);
                     } else {
                         TABLE64(x2, addr);
                     }
-                    PUSH1(x2);
+                    PUSH1z(x2);
                     // TODO: Add support for CALLRET optim
                     /*if(box64_dynarec_callret) {
                         // Push actual return address
@@ -729,16 +730,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         *ok = 0;
                         *need_epilog = 0;
                     }
-                    if(addr+i32==0) {   // self modifying code maybe? so use indirect address fetching
-                        if(addr-4<0x100000000LL) {
-                            MOV64x(x4, addr-4);
-                        } else {
-                            TABLE64(x4, addr-4);
-                        }
-                        LD(x4, x4, 0);
-                        jump_to_next(dyn, 0, x4, ninst);
-                    } else
-                        jump_to_next(dyn, addr+i32, 0, ninst);
+                    jump_to_next(dyn, addr+i32, 0, ninst);
                     break;
             }
             break;
@@ -1075,7 +1067,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     } else {
                         SETFLAGS(X_ALL, SF_SET);    //Hack to put flag in "don't care" state
                     }
-                    GETEDx(0);
+                    GETEDz(0);
                     if(box64_dynarec_callret && box64_dynarec_bigblock>1) {
                         BARRIER(BARRIER_FULL);
                     } else {
@@ -1098,22 +1090,41 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         }
                         STPx_S7_preindex(x4, xRIP, xSP, -16);
                     }*/
-                    PUSH1(xRIP);
+                    PUSH1z(xRIP);
                     jump_to_next(dyn, 0, ed, ninst);
                     break;
                 case 4: // JMP Ed
                     INST_NAME("JMP Ed");
                     READFLAGS(X_PEND);
                     BARRIER(BARRIER_FLOAT);
-                    GETEDx(0);
+                    GETEDz(0);
                     jump_to_next(dyn, 0, ed, ninst);
                     *need_epilog = 0;
                     *ok = 0;
                     break;
+                case 5: // JMP FAR Ed
+                    if(MODREG) {
+                        DEFAULT;
+                    } else {
+                        INST_NAME("JMP FAR Ed");
+                        READFLAGS(X_PEND);
+                        BARRIER(BARRIER_FLOAT);
+                        SMREAD()
+                        addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 0, 0);
+                        LDxw(x1, wback, 0);
+                        ed = x1;
+                        LHU(x3, wback, rex.w?8:4);
+                        SW(x3, xEmu, offsetof(x64emu_t, segs[_CS]));
+                        SW(xZR, xEmu, offsetof(x64emu_t, segs_serial[_CS]));
+                        jump_to_epilog(dyn, 0, ed, ninst);
+                        *need_epilog = 0;
+                        *ok = 0;
+                    }
+                    break;
                 case 6: // Push Ed
                     INST_NAME("PUSH Ed");
-                    GETEDx(0);
-                    PUSH1(ed);
+                    GETEDz(0);
+                    PUSH1z(ed);
                     break;
 
                 default:
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c
index 0fbbd8c5..bc3b2c96 100644
--- a/src/dynarec/rv64/dynarec_rv64_64.c
+++ b/src/dynarec/rv64/dynarec_rv64_64.c
@@ -32,6 +32,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     uint8_t gd, ed, eb1, eb2, gb1, gb2;
     uint8_t gback, wback, wb1, wb2, wb;
     int64_t i64, j64;
+    uint64_t u64;
     int v0, v1;
     int q0;
     int d0;
@@ -53,12 +54,8 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the F0 are ignored
-    rex.rex = 0;
-    while(opcode>=0x40 && opcode<=0x4f) {
-        rex.rex = opcode;
-        opcode = F8;
-    }
+
+    GETREX();
 
     switch(opcode) {
         case 0x03:
@@ -396,6 +393,34 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 LDxw(gd, x4, fixedaddress);
             }
             break;
+
+        case 0xA1:
+            INST_NAME("MOV EAX,FS:Od");
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            // TODO: could be optimized.
+            MOV64z(x1, u64);
+            ADD(x1, x1, x4);
+            LDxw(xRAX, x1, 0);
+            break;
+
+        case 0xA3:
+            INST_NAME("MOV FS:Od,EAX");
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            if(rex.is32bits)
+                u64 = F32;
+            else
+                u64 = F64;
+            // TODO: could be optimized.
+            MOV64z(x1, u64);
+            ADD(x1, x1, x4);
+            SDxw(xRAX, x1, 0);
+            SMWRITE2();
+            break;
+
         case 0xC6:
             INST_NAME("MOV Seg:Eb, Ib");
             grab_segdata(dyn, addr, ninst, x4, seg);
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 2a7280a2..49a7ef65 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -49,14 +49,10 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the 66 are ignored
-    rex.rex = 0;
-    while(opcode>=0x40 && opcode<=0x4f) {
-        rex.rex = opcode;
-        opcode = F8;
-    }
 
-    if(rex.w && opcode!=0x0f)   // rex.w cancels "66", but not for 66 0f type of prefix
+    GETREX();
+
+    if(rex.w && !(opcode==0x0f || opcode==0xf0 || opcode==0x64 || opcode==0x65))   // rex.w cancels "66", but not for 66 0f type of prefix
         return dynarec64_00(dyn, addr-1, ip, ninst, rex, rep, ok, need_epilog); // addr-1, to "put back" opcode
 
     switch(opcode) {
@@ -256,6 +252,42 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 emit_cmp16_0(dyn, ninst, x1, x3, x4);
             }
             break;
+        case 0x40:
+        case 0x41:
+        case 0x42:
+        case 0x43:
+        case 0x44:
+        case 0x45:
+        case 0x46:
+        case 0x47:
+            INST_NAME("INC Reg16 (32bits)");
+            SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
+            gd = xRAX + (opcode&7);
+            ZEXTH(x1, gd);
+            emit_inc16(dyn, ninst, x1, x2, x3, x4);
+            LUI(x3, 0xffff0);
+            AND(gd, gd, x3);
+            OR(gd, gd, x1);
+            ZEROUP(gd);
+            break;
+        case 0x48:
+        case 0x49:
+        case 0x4A:
+        case 0x4B:
+        case 0x4C:
+        case 0x4D:
+        case 0x4E:
+        case 0x4F:
+            INST_NAME("DEC Reg16 (32bits)");
+            SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
+            gd = xRAX + (opcode&7);
+            ZEXTH(x1, gd);
+            emit_dec16(dyn, ninst, x1, x2, x3, x4, x5);
+            LUI(x3, 0xffff0);
+            AND(gd, gd, x3);
+            OR(gd, gd, x1);
+            ZEROUP(gd);
+            break;
         case 0x64:
             addr = dynarec64_6664(dyn, addr, ip, ninst, rex, _FS, ok, need_epilog);
             break;
@@ -610,7 +642,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 }
             }
             break;
-            
+
         case 0xC1:
             nextop = F8;
             switch((nextop>>3)&7) {
@@ -706,7 +738,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     break;
             }
             break;
-            
+
         case 0xC7:
             INST_NAME("MOV Ew, Iw");
             nextop = F8;
diff --git a/src/dynarec/rv64/dynarec_rv64_6664.c b/src/dynarec/rv64/dynarec_rv64_6664.c
index fbf8b15d..a139e3ae 100644
--- a/src/dynarec/rv64/dynarec_rv64_6664.c
+++ b/src/dynarec/rv64/dynarec_rv64_6664.c
@@ -34,12 +34,7 @@ uintptr_t dynarec64_6664(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
     int unscaled;
     MAYUSE(j64);
 
-    // REX prefix before the 66 are ignored
-    rex.rex = 0;
-    while(opcode>=0x40 && opcode<=0x4f) {
-        rex.rex = opcode;
-        opcode = F8;
-    }
+    GETREX();
 
     switch(opcode) {
         case 0x8B:
diff --git a/src/dynarec/rv64/dynarec_rv64_66f0.c b/src/dynarec/rv64/dynarec_rv64_66f0.c
index ee2e0b66..863e535d 100644
--- a/src/dynarec/rv64/dynarec_rv64_66f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_66f0.c
@@ -44,12 +44,8 @@ uintptr_t dynarec64_66F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the F0/66 are ignored
-    rex.rex = 0;
-    while(opcode>=0x40 && opcode<=0x4f) {
-        rex.rex = opcode;
-        opcode = F8;
-    }
+
+    GETREX();
 
     switch(opcode) {
         case 0x81:
diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c
index 89ee4fe3..cb7702a8 100644
--- a/src/dynarec/rv64/dynarec_rv64_67.c
+++ b/src/dynarec/rv64/dynarec_rv64_67.c
@@ -46,12 +46,14 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     MAYUSE(lock);
     MAYUSE(cacheupd);
 
-    // REX prefix before the 67 are ignored
-    rex.rex = 0;
-    while(opcode>=0x40 && opcode<=0x4f) {
-        rex.rex = opcode;
-        opcode = F8;
+    if(rex.is32bits) {
+        // should do a different file
+        DEFAULT;
+        return addr;
     }
+
+    GETREX();
+
     rep = 0;
     while((opcode==0xF2) || (opcode==0xF3)) {
         rep = opcode-0xF1;
diff --git a/src/dynarec/rv64/dynarec_rv64_f0.c b/src/dynarec/rv64/dynarec_rv64_f0.c
index 65a144da..348f2905 100644
--- a/src/dynarec/rv64/dynarec_rv64_f0.c
+++ b/src/dynarec/rv64/dynarec_rv64_f0.c
@@ -46,12 +46,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         rep = opcode-0xF1;
         opcode = F8;
     }
-    // REX prefix before the F0 are ignored
-    rex.rex = 0;
-    while(opcode>=0x40 && opcode<=0x4f) {
-        rex.rex = opcode;
-        opcode = F8;
-    }
+
+    GETREX();
 
     // TODO: Take care of unligned memory access for all the LOCK ones.
     // https://github.com/ptitSeb/box64/pull/604
@@ -115,14 +111,14 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 if(rex.rex) {
                                     wback = xRAX+(nextop&7)+(rex.b<<3);
                                     wb2 = 0;
-                                } else { 
+                                } else {
                                     wback = (nextop&7);
                                     wb2 = (wback>>2)*8;
                                     wback = xRAX+(wback&3);
                                 }
                                 if (wb2) {
-                                    MV(x2, wback); 
-                                    SRLI(x2, x2, wb2); 
+                                    MV(x2, wback);
+                                    SRLI(x2, x2, wb2);
                                     ANDI(x2, x2, 0xff);
                                 } else {
                                     ANDI(x2, wback, 0xff);
@@ -134,8 +130,8 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 }
                                 BNE_MARK2(x6, x2);
                                 if (wb2) {
-                                    MV(wback, x2); 
-                                    SRLI(wback, wback, wb2); 
+                                    MV(wback, x2);
+                                    SRLI(wback, wback, wb2);
                                     ANDI(wback, wback, 0xff);
                                 } else {
                                     ANDI(wback, x2, 0xff);
@@ -148,7 +144,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                                 B_NEXT_nocond;
                             } else {
                                 // this one is tricky, and did some repetitive work.
-                                // mostly because we only got 6 scratch registers, 
+                                // mostly because we only got 6 scratch registers,
                                 // and has so much to do.
                                 if(rex.rex) {
                                     gb1 = xRAX+((nextop&0x38)>>3)+(rex.r<<3);
@@ -541,7 +537,7 @@ uintptr_t dynarec64_F0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                             emit_sub32c(dyn, ninst, rex, x1, i64, x3, x4, x5, x6);
                     }
                     break;
-                default: 
+                default:
                     DEFAULT;
             }
             SMDMB();
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index b31b79eb..541ac45f 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -128,7 +128,7 @@ int extcache_get_st_f(dynarec_rv64_t* dyn, int ninst, int a)
          && dyn->insts[ninst].e.extcache[i].n==a)
             return i;
     return -1;
-} 
+}
 int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a)
 {
     for(int i=0; i<24; ++i)
@@ -136,7 +136,7 @@ int extcache_get_st_f_noback(dynarec_rv64_t* dyn, int ninst, int a)
          && dyn->insts[ninst].e.extcache[i].n==a)
             return i;
     return -1;
-} 
+}
 int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a)
 {
     for(int i=0; i<24; ++i)
@@ -144,7 +144,7 @@ int extcache_get_current_st_f(dynarec_rv64_t* dyn, int a)
          && dyn->e.extcache[i].n==a)
             return i;
     return -1;
-} 
+}
 
 static void extcache_promote_double_forward(dynarec_rv64_t* dyn, int ninst, int maxinst, int a);
 static void extcache_promote_double_internal(dynarec_rv64_t* dyn, int ninst, int maxinst, int a);
@@ -153,7 +153,7 @@ static void extcache_promote_double_combined(dynarec_rv64_t* dyn, int ninst, int
     if(a == dyn->insts[ninst].e.combined1 || a == dyn->insts[ninst].e.combined2) {
         if(a == dyn->insts[ninst].e.combined1) {
             a = dyn->insts[ninst].e.combined2;
-        } else 
+        } else
             a = dyn->insts[ninst].e.combined1;
         int i = extcache_get_st_f_noback(dyn, ninst, a);
         //if(box64_dynarec_dump) dynarec_log(LOG_NONE, "extcache_promote_double_combined, ninst=%d combined%c %d i=%d (stack:%d/%d)\n", ninst, (a == dyn->insts[ninst].e.combined2)?'2':'1', a ,i, dyn->insts[ninst].e.stack_push, -dyn->insts[ninst].e.stack_pop);
@@ -326,7 +326,7 @@ void extcacheUnwind(extcache_t* cache)
 {
     if(cache->swapped) {
         // unswap
-        int a = -1; 
+        int a = -1;
         int b = -1;
         for(int j=0; j<24 && ((a==-1) || (b==-1)); ++j)
             if((cache->extcache[j].t == EXT_CACHE_ST_D || cache->extcache[j].t == EXT_CACHE_ST_F)) {
@@ -491,7 +491,7 @@ const char* getCacheName(int t, int n)
     return buff;
 }
 
-void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name)
+void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex)
 {
     static const char* fnames[] = {
         "ft0"," ft1", "ft2", "ft3", "ft4", "ft5", "ft6", "ft7",
@@ -501,12 +501,12 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name)
         "ft8", "ft9", "ft10", "ft11"
     };
     if(box64_dynarec_dump) {
-        printf_x64_instruction(my_context->dec, &dyn->insts[ninst].x64, name);
+        printf_x64_instruction(rex.is32bits?my_context->dec32:my_context->dec, &dyn->insts[ninst].x64, name);
         dynarec_log(LOG_NONE, "%s%p: %d emitted opcodes, inst=%d, barrier=%d state=%d/%d(%d), %s=%X/%X, use=%X, need=%X/%X, sm=%d/%d",
             (box64_dynarec_dump>1)?"\e[32m":"",
             (void*)(dyn->native_start+dyn->insts[ninst].address),
             dyn->insts[ninst].size/4,
-            ninst,         
+            ninst,
             dyn->insts[ninst].x64.barrier,
             dyn->insts[ninst].x64.state_flags,
             dyn->f.pending,
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h
index fc53dcd7..451336bd 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.h
+++ b/src/dynarec/rv64/dynarec_rv64_functions.h
@@ -45,7 +45,7 @@ void extcacheUnwind(extcache_t* cache);
 
 const char* getCacheName(int t, int n);
 
-void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name);
+void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t rex);
 void print_opcode(dynarec_native_t* dyn, int ninst, uint32_t opcode);
 void print_newinst(dynarec_native_t* dyn, int ninst);
 
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 52cb5ce6..3342880e 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -26,11 +26,16 @@
 #include "dynarec_rv64_functions.h"
 #include "dynarec_rv64_helper.h"
 
+static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int *l, int i12);
+
 /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */
 uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int *l, int i12, int delta)
 {
     MAYUSE(dyn); MAYUSE(ninst); MAYUSE(delta);
 
+    if(rex.is32bits)
+        return geted_32(dyn, addr, ninst, nextop, ed, hint, scratch, fixaddress, l, i12);
+
     int lock = l?((l==LOCK_LOCK)?1:2):0;
     if(lock==2)
         *l = 0;
@@ -206,6 +211,136 @@ uintptr_t geted(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop,
     return addr;
 }
 
+static uintptr_t geted_32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, int *l, int i12)
+{
+    MAYUSE(dyn); MAYUSE(ninst);
+
+    int lock = l?((l==LOCK_LOCK)?1:2):0;
+    if(lock==2)
+        *l = 0;
+    uint8_t ret = x2;
+    *fixaddress = 0;
+    if(hint>0) ret = hint;
+    int maxval = 2047;
+    if(i12>1)
+        maxval -= i12;
+    MAYUSE(scratch);
+    if(!(nextop&0xC0)) {
+        if((nextop&7)==4) {
+            uint8_t sib = F8;
+            int sib_reg = (sib>>3)&0x7;
+            int sib_reg2 = sib&0x7;
+            if(sib_reg2==5) {
+                int64_t tmp = F32S;
+                if (sib_reg!=4) {
+                    if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) {
+                        MOV32w(scratch, tmp);
+                        if((sib>>6)) {
+                            if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, sib>>6); ADDW(ret, ret, scratch);}
+                        } else
+                            ADDW(ret, xRAX+sib_reg, scratch);
+                    } else {
+                        if(sib>>6)
+                            SLLI(ret, xRAX+sib_reg, (sib>>6));
+                        else
+                            ret = xRAX+sib_reg;
+                        *fixaddress = tmp;
+                    }
+                } else {
+                    switch(lock) {
+                        case 1: addLockAddress((int32_t)tmp); break;
+                        case 2: if(isLockAddress((int32_t)tmp)) *l=1; break;
+                    }
+                    MOV32w(ret, tmp);
+                }
+            } else {
+                if (sib_reg!=4) {
+                    if((sib>>6)) {
+                        if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else { SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);}
+                    } else
+                        ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                } else {
+                    ret = xRAX+sib_reg2;
+                }
+            }
+        } else if((nextop&7)==5) {
+            uint32_t tmp = F32;
+            MOV32w(ret, tmp);
+            switch(lock) {
+                case 1: addLockAddress(tmp); break;
+                case 2: if(isLockAddress(tmp)) *l=1; break;
+            }
+        } else {
+            ret = xRAX+(nextop&7);
+            if(ret==hint) {
+                AND(hint, ret, xMASK);    //to clear upper part
+            }
+        }
+    } else {
+        int64_t i32;
+        uint8_t sib = 0;
+        int sib_reg = 0;
+        if((nextop&7)==4) {
+            sib = F8;
+            sib_reg = (sib>>3)&7;
+        }
+        int sib_reg2 = sib&0x07;
+        if(nextop&0x80)
+            i32 = F32S;
+        else
+            i32 = F8S;
+        if(i32==0 || ((i32>=-2048) && (i32<=2047)  && i12)) {
+            *fixaddress = i32;
+            if((nextop&7)==4) {
+                if (sib_reg!=4) {
+                    if(sib>>6) {
+                    if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);}
+                    } else
+                        ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                } else {
+                    ret = xRAX+sib_reg2;
+                }
+            } else {
+                ret = xRAX+(nextop&0x07);
+            }
+        } else {
+            if(i32>=-2048 && i32<=2047) {
+                if((nextop&7)==4) {
+                    if (sib_reg!=4) {
+                        if(sib>>6) {
+                            if(rv64_zba) SHxADDUW(scratch, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(scratch, xRAX+sib_reg, sib>>6); ADDW(scratch, scratch, xRAX+sib_reg2);}
+                        } else
+                            ADDW(scratch, xRAX+sib_reg2, xRAX+sib_reg);
+                    } else {
+                        scratch = xRAX+sib_reg2;
+                    }
+                } else
+                    scratch = xRAX+(nextop&0x07);
+                ADDIW(ret, scratch, i32);
+            } else {
+                MOV32w(scratch, i32);
+                if((nextop&7)==4) {
+                    if (sib_reg!=4) {
+                        ADDW(scratch, scratch, xRAX+sib_reg2);
+                        if(sib>>6) {
+                            if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, scratch);}
+                        } else
+                            ADDW(ret, scratch, xRAX+sib_reg);
+                    } else {
+                        PASS3(int tmp = xRAX+sib_reg2);
+                        ADDW(ret, tmp, scratch);
+                    }
+                } else {
+                    PASS3(int tmp = xRAX+(nextop&0x07));
+                    ADDW(ret, tmp, scratch);
+                }
+            }
+        }
+    }
+    *ed = ret;
+    return addr;
+}
+
 /* setup r2 to address pointed by ED, also fixaddress is an optionnal delta in the range [-absmax, +absmax], with delta&mask==0 to be added to ed for LDR/STR */
 uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop, uint8_t* ed, uint8_t hint, uint8_t scratch, int64_t* fixaddress, rex_t rex, int *l, int i12, int delta)
 {
@@ -229,12 +364,12 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
             if((sib&0x7)==5) {
                 int64_t tmp = F32S;
                 if (sib_reg!=4) {
-                    if(tmp && ((tmp<-2048) && (tmp>maxval) || !i12)) {
+                    if(tmp && ((tmp<-2048) || (tmp>maxval) || !i12)) {
                         MOV64x(scratch, tmp);
                         if((sib>>6)) {
-                            if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, sib>>6); ADD(ret, ret, scratch);}
+                            if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, sib>>6); ADDW(ret, ret, scratch);}
                         } else
-                            ADD(ret, xRAX+sib_reg, scratch);
+                            ADDW(ret, xRAX+sib_reg, scratch);
                     } else {
                         if(sib>>6)
                             SLLI(ret, xRAX+sib_reg, (sib>>6));
@@ -252,9 +387,9 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
             } else {
                 if (sib_reg!=4) {
                     if((sib>>6)) {
-                        if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else { SLLI(ret, xRAX+sib_reg, (sib>>6)); ADD(ret, ret, xRAX+sib_reg2);}
+                        if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else { SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);}
                     } else
-                        ADD(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                        ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
                 } else {
                     ret = xRAX+sib_reg2;
                 }
@@ -263,7 +398,7 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
             uint32_t tmp = F32;
             MOV32w(ret, tmp);
             GETIP(addr+delta);
-            ADD(ret, ret, xRIP);
+            ADDW(ret, ret, xRIP);
             switch(lock) {
                 case 1: addLockAddress(addr+delta+tmp); break;
                 case 2: if(isLockAddress(addr+delta+tmp)) *l=1; break;
@@ -292,9 +427,9 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
             if((nextop&7)==4) {
                 if (sib_reg!=4) {
                     if(sib>>6) {
-                    if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADD(ret, ret, xRAX+sib_reg2);}
+                    if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, xRAX+sib_reg2);}
                     } else
-                        ADD(ret, xRAX+sib_reg2, xRAX+sib_reg);
+                        ADDW(ret, xRAX+sib_reg2, xRAX+sib_reg);
                 } else {
                     ret = xRAX+sib_reg2;
                 }
@@ -306,31 +441,31 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
                 if((nextop&7)==4) {
                     if (sib_reg!=4) {
                         if(sib>>6) {
-                            if(rv64_zba) SHxADD(scratch, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(scratch, xRAX+sib_reg, sib>>6); ADD(scratch, scratch, xRAX+sib_reg2);}
+                            if(rv64_zba) SHxADDUW(scratch, xRAX+sib_reg, (sib>>6), xRAX+sib_reg2); else {SLLI(scratch, xRAX+sib_reg, sib>>6); ADDW(scratch, scratch, xRAX+sib_reg2);}
                         } else
-                            ADD(scratch, xRAX+sib_reg2, xRAX+sib_reg);
+                            ADDW(scratch, xRAX+sib_reg2, xRAX+sib_reg);
                     } else {
                         scratch = xRAX+sib_reg2;
                     }
                 } else
                     scratch = xRAX+(nextop&0x07)+(rex.b<<3);
-                ADDI(ret, scratch, i64);
+                ADDIW(ret, scratch, i64);
             } else {
                 MOV32w(scratch, i64);
                 if((nextop&7)==4) {
                     if (sib_reg!=4) {
-                        ADD(scratch, scratch, xRAX+sib_reg2);
+                        ADDW(scratch, scratch, xRAX+sib_reg2);
                         if(sib>>6) {
-                            if(rv64_zba) SHxADD(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADD(ret, ret, scratch);}
+                            if(rv64_zba) SHxADDUW(ret, xRAX+sib_reg, (sib>>6), scratch); else {SLLI(ret, xRAX+sib_reg, (sib>>6)); ADDW(ret, ret, scratch);}
                         } else
-                            ADD(ret, scratch, xRAX+sib_reg);
+                            ADDW(ret, scratch, xRAX+sib_reg);
                     } else {
                         PASS3(int tmp = xRAX+sib_reg2);
-                        ADD(ret, tmp, scratch);
+                        ADDW(ret, tmp, scratch);
                     }
                 } else {
                     PASS3(int tmp = xRAX+(nextop&0x07)+(rex.b<<3));
-                    ADD(ret, tmp, scratch);
+                    ADDW(ret, tmp, scratch);
                 }
             }
         }
@@ -428,12 +563,12 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst)
     JALR(x2); // save LR...
 }
 
-void ret_to_epilog(dynarec_rv64_t* dyn, int ninst)
+void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex)
 {
     MAYUSE(dyn); MAYUSE(ninst);
     MESSAGE(LOG_DUMP, "Ret to epilog\n");
-    POP1(xRIP);
-    MV(x1, xRIP);
+    POP1z(xRIP);
+    MVz(x1, xRIP);
     SMEND();
     /*if(box64_dynarec_callret) {
         // pop the actual return address from RV64 stack
@@ -476,18 +611,18 @@ void ret_to_epilog(dynarec_rv64_t* dyn, int ninst)
     CLEARIP();
 }
 
-void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n)
+void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n)
 {
     MAYUSE(dyn); MAYUSE(ninst);
     MESSAGE(LOG_DUMP, "Retn to epilog\n");
-    POP1(xRIP);
+    POP1z(xRIP);
     if(n>0x7ff) {
         MOV64x(w1, n);
-        ADD(xRSP, xRSP, x1);
+        ADDz(xRSP, xRSP, x1);
     } else {
-        ADDI(xRSP, xRSP, n);
+        ADDIz(xRSP, xRSP, n);
     }
-    MV(x1, xRIP);
+    MVz(x1, xRIP);
     SMEND();
     /*if(box64_dynarec_callret) {
         // pop the actual return address from RV64 stack
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 6ce62914..0b1023b3 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -99,6 +99,15 @@
                     LD(x1, wback, fixedaddress);        \
                     ed = x1;                            \
                 }
+#define GETEDz(D) if(MODREG) {                          \
+                    ed = xRAX+(nextop&7)+(rex.b<<3);    \
+                    wback = 0;                          \
+                } else {                                \
+                    SMREAD()                            \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \
+                    LDz(x1, wback, fixedaddress);       \
+                    ed = x1;                            \
+                }
 // GETED32 can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
 #define GETED32(D)  if(MODREG) {                        \
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
@@ -987,8 +996,8 @@ uintptr_t geted32(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop
 void jump_to_epilog(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
 void jump_to_epilog_fast(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
 void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
-void ret_to_epilog(dynarec_rv64_t* dyn, int ninst);
-void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, int n);
+void ret_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex);
+void retn_to_epilog(dynarec_rv64_t* dyn, int ninst, rex_t rex, int n);
 void iret_to_epilog(dynarec_rv64_t* dyn, int ninst, int is64bits);
 void call_c(dynarec_rv64_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
 void call_n(dynarec_rv64_t* dyn, int ninst, void* fnc, int w);
@@ -1328,4 +1337,12 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
         SW(s2, xEmu, offsetof(x64emu_t, test.test));        \
     }
 
+#define GETREX()                                \
+    rex.rex = 0;                                \
+    if(!rex.is32bits)                           \
+        while(opcode>=0x40 && opcode<=0x4f) {   \
+            rex.rex = opcode;                   \
+            opcode = F8;                        \
+        }
+
 #endif //__DYNAREC_RV64_HELPER_H__
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
index dafef0c5..459c4e13 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass3.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -1,4 +1,4 @@
-#define INIT    
+#define INIT
 #define FINI        \
     if(ninst)       \
         addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \
@@ -16,8 +16,8 @@
     if(box64_dynarec_dump) print_newinst(dyn, ninst);   \
     if(ninst)                                           \
         addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst-1].x64.size, dyn->insts[ninst-1].size/4);
-#define INST_EPILOG     
-#define INST_NAME(name) inst_name_pass3(dyn, ninst, name)
+#define INST_EPILOG
+#define INST_NAME(name) inst_name_pass3(dyn, ninst, name, rex)
 
 #define TABLE64(A, V)   {int val64offset = Table64(dyn, (V), 3); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));}
 #define FTABLE64(A, V)  {mmx87_regs_t v = {.d = V}; int val64offset = Table64(dyn, v.q, 3); MESSAGE(LOG_DUMP, "  FTable64: %g\n", v.d); AUIPC(x1, SPLIT20(val64offset)); FLD(A, x1, SPLIT12(val64offset));}
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 6b01c342..e9fa2f6d 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -113,6 +113,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define MOV64x(A, B)    rv64_move64(dyn, ninst, A, B)
 #define MOV32w(A, B)    rv64_move32(dyn, ninst, A, B, 1)
 #define MOV64xw(A, B)   if(rex.w) {MOV64x(A, B);} else {MOV32w(A, B);}
+#define MOV64z(A, B)    if(rex.is32bits) {MOV32w(A, B);} else {MOV64x(A, B);}
 
 // ZERO the upper part
 #define ZEROUP(r)       AND(r, r, xMASK)
@@ -175,12 +176,16 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define ADDW(rd, rs1, rs2)          EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, 0b0111011))
 // rd = rs1 + rs2
 #define ADDxw(rd, rs1, rs2)         EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011))
+// rd = rs1 + rs2
+#define ADDz(rd, rs1, rs2)          EMIT(R_type(0b0000000, rs2, rs1, 0b000, rd, rex.is32bits?0b0111011:0b0110011))
 // rd = rs1 - rs2
 #define SUB(rd, rs1, rs2)           EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0110011))
 // rd = rs1 - rs2
 #define SUBW(rd, rs1, rs2)          EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, 0b0111011))
 // rd = rs1 - rs2
 #define SUBxw(rd, rs1, rs2)         EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.w?0b0110011:0b0111011))
+// rd = rs1 - rs2
+#define SUBz(rd, rs1, rs2)          EMIT(R_type(0b0100000, rs2, rs1, 0b000, rd, rex.is32bits?0b0111011:0b0110011))
 // rd = rs1<<rs2
 #define SLL(rd, rs1, rs2)           EMIT(R_type(0b0000000, rs2, rs1, 0b001, rd, 0b0110011))
 // rd = (rs1<rs2)?1:0
@@ -203,7 +208,9 @@ f28–31  ft8–11  FP temporaries                  Caller
 // rd = rs1 (pseudo instruction)
 #define MV(rd, rs1)                 ADDI(rd, rs1, 0)
 // rd = rs1 (pseudo instruction)
-#define MVxw(rd, rs1)               if(rex.w) {MV(rd, rs1); } else {AND(rd, rs1, xMASK);}
+#define MVxw(rd, rs1)               if(rex.w) {MV(rd, rs1);} else {AND(rd, rs1, xMASK);}
+// rd = rs1 (pseudo instruction)
+#define MVz(rd, rs1)               if(rex.is32bits) {AND(rd, rs1, xMASK);} else {MV(rd, rs1);}
 // rd = !rs1
 #define NOT(rd, rs1)                XORI(rd, rs1, -1)
 // rd = -rs1
@@ -255,9 +262,12 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define SW(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b010, 0b0100011))
 
 #define PUSH1(reg)                  do {SD(reg, xRSP, -8); SUBI(xRSP, xRSP, 8);} while(0)
-#define POP1(reg)                   do {LD(reg, xRSP, 0); ADDI(xRSP, xRSP, 8);} while(0)
-#define PUSH1_32(reg)               do {SW(reg, xRSP, -4); SUBI(xRSP, xRSP, 4);} while(0)
-#define POP1_32(reg)                do {LWU(reg, xRSP, 0); ADDI(xRSP, xRSP, 4);} while(0)
+#define POP1(reg)                   do {LD(reg, xRSP, 0); if (reg!=xRSP) ADDI(xRSP, xRSP, 8);} while(0)
+#define PUSH1_32(reg)               do {SW(reg, xRSP, -4); SUBIW(xRSP, xRSP, 4);} while(0)
+#define POP1_32(reg)                do {LWU(reg, xRSP, 0); if (reg!=xRSP) ADDIW(xRSP, xRSP, 4);} while(0)
+
+#define POP1z(reg)                  if(rex.is32bits) {POP1_32(reg);} else {POP1(reg);}
+#define PUSH1z(reg)                 if(rex.is32bits) {PUSH1_32(reg);} else {PUSH1(reg);}
 
 #define FENCE_gen(pred, succ)       (((pred)<<24) | ((succ)<<20) | 0b0001111)
 #define FENCE()                     EMIT(FENCE_gen(3, 3))
@@ -274,10 +284,14 @@ f28–31  ft8–11  FP temporaries                  Caller
 #define LD(rd, rs1, imm12)          EMIT(I_type(imm12, rs1, 0b011, rd, 0b0000011))
 // rd = [rs1 + imm12]
 #define LDxw(rd, rs1, imm12)        EMIT(I_type(imm12, rs1, 0b011<<(1-rex.w), rd, 0b0000011))
+// rd = [rs1 + imm12]
+#define LDz(rd, rs1, imm12)         EMIT(I_type(imm12, rs1, 0b011<<rex.is32bits, rd, 0b0000011))
 // [rs1 + imm12] = rs2
 #define SD(rs2, rs1, imm12)         EMIT(S_type(imm12, rs2, rs1, 0b011, 0b0100011))
 // [rs1 + imm12] = rs2
 #define SDxw(rs2, rs1, imm12)       EMIT(S_type(imm12, rs2, rs1, 0b010+rex.w, 0b0100011))
+// [rs1 + imm12] = rs2
+#define SDz(rs2, rs1, imm12)        EMIT(S_type(imm12, rs2, rs1, 0b010+(1-rex.is32bits), 0b0100011))
 
 // Shift Left Immediate
 #define SLLI(rd, rs1, imm6)         EMIT(I_type(imm6, rs1, 0b001, rd, 0b0010011))
@@ -288,8 +302,12 @@ f28–31  ft8–11  FP temporaries                  Caller
 
 // rd = rs1 + imm12
 #define ADDIW(rd, rs1, imm12)       EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, 0b0011011))
+// rd = rs1 - imm12
+#define SUBIW(rd, rs1, imm12)       EMIT(I_type((-imm12)&0b111111111111, rs1, 0b000, rd, 0b0011011))
 // rd = rs1 + imm12
 #define ADDIxw(rd, rs1, imm12)      EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.w?0b0010011:0b0011011))
+// rd = rs1 + imm12
+#define ADDIz(rd, rs1, imm12)       EMIT(I_type((imm12)&0b111111111111, rs1, 0b000, rd, rex.is32bits?0b0011011:0b0010011))
 
 #define SEXT_W(rd, rs1)             ADDIW(rd, rs1, 0)