about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-03-16 14:26:35 +0000
committerptitSeb <sebastien.chev@gmail.com>2023-03-16 14:26:35 +0000
commit7cb9a5064a586c2599bded030e229d376c97e6cf (patch)
treea18f11b2a21ebe023bd4ce199b4b6164e29151a5 /src
parent4b6ec02517635f7ff0a074c5b4092b589b9b04bf (diff)
downloadbox64-7cb9a5064a586c2599bded030e229d376c97e6cf.tar.gz
box64-7cb9a5064a586c2599bded030e229d376c97e6cf.zip
[RV64_DYNAREC] Added 70-7F Jcc condition jump opcodes, plus a ton of fixes and a few optims
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00.c58
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_logic.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c32
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c18
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_tests.c33
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.c49
-rw-r--r--src/dynarec/rv64/dynarec_rv64_functions.h3
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.c67
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h119
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass2.h2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_pass3.h30
-rw-r--r--src/dynarec/rv64/rv64_emitter.h2
-rw-r--r--src/dynarec/rv64/rv64_epilog.S2
13 files changed, 336 insertions, 83 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index 6f1b7f7c..72c10f90 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -89,7 +89,9 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGD;
             GETED(0);
             emit_xor32(dyn, ninst, rex, ed, gd, x3, x4);
-            WBACK;
+            if(ed!=gd) {
+                WBACK;
+            }
             break;
         case 0x39:
             INST_NAME("CMP Ed, Gd");
@@ -127,6 +129,37 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ADDI(xRSP, xRSP, 8);
             }
             break;
+
+        #define GO(GETFLAGS, NO, YES, F)                                \
+            READFLAGS(F);                                               \
+            i8 = F8S;                                                   \
+            BARRIER(BARRIER_MAYBE);                                     \
+            JUMP(addr+i8, 1);                                           \
+            GETFLAGS;                                                   \
+            if(dyn->insts[ninst].x64.jmp_insts==-1 ||                   \
+                CHECK_CACHE()) {                                        \
+                /* out of the block */                                  \
+                i32 = dyn->insts[ninst].epilog-(dyn->native_size);      \
+                NO(x1, i32);                                            \
+                if(dyn->insts[ninst].x64.jmp_insts==-1) {               \
+                    if(!(dyn->insts[ninst].x64.barrier&BARRIER_FLOAT))  \
+                        fpu_purgecache(dyn, ninst, 1, x1, x2, x3);      \
+                    jump_to_next(dyn, addr+i8, 0, ninst);               \
+                } else {                                                \
+                    CacheTransform(dyn, ninst, cacheupd, x1, x2, x3);   \
+                    i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);\
+                    B(i32);                                             \
+                }                                                       \
+            } else {                                                    \
+                /* inside the block */                                  \
+                i32 = dyn->insts[dyn->insts[ninst].x64.jmp_insts].address-(dyn->native_size);    \
+                YES(x1, i32);                                           \
+            }
+
+        GOCOND(0x70, "J", "ib");
+
+        #undef GO
+
         case 0x80:
             nextop = F8;
             switch((nextop>>3)&7) {
@@ -314,17 +347,22 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
                 if (eb2) {
                     // load a mask to x3 (ffffffffffff00ff)
-                    LUI(x3, 0xffff0);
-                    ADDI(x3, x3, 0xff);
+                    LUI(x3, 0xffffffffffff0);
+                    ORI(x3, x3, 0xff);
                     // apply mask
                     AND(eb1, eb1, x3);
-                    ADDI(x4, xZR, u8);
-                    SLLI(x4, x4, 8);
-                    OR(eb1, eb1, x4);
+                    if(u8) {
+                        if((u8<<8)<2048) {
+                            ADDI(x4, xZR, u8<<8);
+                        } else {
+                            ADDI(x4, xZR, u8);
+                            SLLI(x4, x4, 8);
+                        }
+                        OR(eb1, eb1, x4);
+                    }
                 } else {
-                    SRLI(eb1, eb1, 8);
-                    SLLI(eb1, eb1, 8);
-                    ADDI(eb1, eb1, u8);
+                    ANDI(eb1, eb1, 0xf00);  // mask ffffffffffffff00
+                    ORI(eb1, eb1, u8);
                 }
             } else {                    // mem <= u8
                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, &lock, 0, 1);
@@ -378,6 +416,8 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     //x87_forget(dyn, ninst, x3, x4, 0);
                     //sse_purge07cache(dyn, ninst, x3);
                     tmp = isSimpleWrapper(*(wrapper_t*)(addr));
+                    if(tmp<0 || tmp>1)
+                        tmp=0;  //TODO: removed when FP is in place
                     if((box64_log<2 && !cycle_log) && tmp) {
                         //GETIP(ip+3+8+8); // read the 0xCC
                         call_n(dyn, ninst, *(void**)(addr+8), tmp);
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
index b36867f8..0f7527d9 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
@@ -37,7 +37,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 
     // test sign bit before zeroup.
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -49,7 +49,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     IFX(X_ZF) {
-        BNEZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, F_ZF);
     }
     IFX(X_PF) {
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index b2e9be9c..a201d639 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -45,12 +45,12 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
             SRLI(s5, s5, 0x20);
             ADD(s5, s5, s4); // hi
             SRAI(s5, s5, 0x20);
-            BEQZ(s5, 4);
+            BEQZ(s5, 8);
             ORI(xFlags, xFlags, 1 << F_CF);
         } else {
             ADD(s5, s1, s2);
             SRLI(s5, s5, 0x20);
-            BEQZ(s5, 4);
+            BEQZ(s5, 8);
             ORI(xFlags, xFlags, 1 << F_CF);
         }
     }
@@ -70,7 +70,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 4);
+            BEQZ(s4, 8);
             ORI(xFlags, xFlags, 1 << F_AF);
         }
         IFX(X_OF) {
@@ -78,12 +78,12 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
             ORI(xFlags, xFlags, 1 << F_OF2);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -93,7 +93,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
 }
@@ -134,12 +134,12 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
             SRLI(s5, s5, 0x20);
             ADD(s5, s5, s4); // hi
             SRAI(s5, s5, 0x20);
-            BEQZ(s5, 4);
+            BEQZ(s5, 8);
             ORI(xFlags, xFlags, 1 << F_CF);
         } else {
             ADD(s5, s1, s2);
             SRLI(s5, s5, 0x20);
-            BEQZ(s5, 4);
+            BEQZ(s5, 8);
             ORI(xFlags, xFlags, 1 << F_CF);
         }
     }
@@ -164,7 +164,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 4);
+            BEQZ(s4, 8);
             ORI(xFlags, xFlags, 1 << F_AF);
         }
         IFX(X_OF) {
@@ -172,12 +172,12 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
             ORI(xFlags, xFlags, 1 << F_OF2);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -187,7 +187,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
 }
@@ -215,7 +215,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -223,7 +223,7 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BEQZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
@@ -279,7 +279,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -287,7 +287,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     }
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BEQZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 36c5c192..db038011 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -46,11 +46,11 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         if (c > 1) {
             SRAI(s3, s1, c-1);
             ANDI(s3, s3, 1); // LSB
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
         } else {
             // no need to shift
             ANDI(s3, s1, 1);
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
         }
         ORI(xFlags, xFlags, 1 << F_CF);
     }
@@ -58,7 +58,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     SRLIxw(s1, s1, c);
 
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -68,7 +68,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_OF) {
@@ -77,7 +77,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
             SRLI(s4, s1, rex.w?63:31);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1);
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
             ORI(xFlags, xFlags, 1 << F_OF2);
         }
     }
@@ -109,11 +109,11 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         if (c > 1) {
             SRAI(s3, s1, c-1);
             ANDI(s3, s3, 1); // LSB
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
         } else {
             // no need to shift
             ANDI(s3, s1, 1);
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
         }
         ORI(xFlags, xFlags, 1 << F_CF);
     }
@@ -122,7 +122,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 
     // SRAIW sign-extends, so test sign bit before clearing upper bits
     IFX(X_SF) {
-        BGE(s1, xZR, 4);
+        BGE(s1, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
@@ -132,7 +132,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
index 0b85d831..59f2b495 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
@@ -24,7 +24,8 @@
 #include "dynarec_rv64_helper.h"
 
 // emit CMP8 instruction, from cmp s1, s2, using s3, s4, s5 and s6 as scratch
-void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6) {
+void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5, int s6)
+{
     CLEAR_FLAGS();
     IFX_PENDOR0 {
         SB(s1, xEmu, offsetof(x64emu_t, op1));
@@ -47,12 +48,12 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     }
     IFX(X_SF) {
         SRLI(s3, s6, 7);
-        BEQZ(s3, 4);
+        BEQZ(s3, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8);
     IFX(X_ZF) {
-        BEQZ(s6, 4);
+        BNEZ(s6, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
@@ -61,7 +62,8 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
 }
 
 // emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch
-void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) {
+void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
+{
     CLEAR_FLAGS();
     IFX_PENDOR0 {
         SB(s1, xEmu, offsetof(x64emu_t, op1));
@@ -74,11 +76,11 @@ void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4) {
 
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 4);
+        BEQZ(s3, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     IFX(X_ZF) {
-        BEQZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
@@ -109,15 +111,15 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s6, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        BGE(s6, xZR, 0);
+        BGE(s6, xZR, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     if (!rex.w) {
-        ZEROUP(s1);
+        ZEROUP(s6);
     }
     CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BEQZ(s6, 4);
+        BNEZ(s6, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
@@ -128,6 +130,7 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 // emit CMP32 instruction, from cmp s1, 0, using s3 and s4 as scratch
 void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4)
 {
+    CLEAR_FLAGS();
     IFX_PENDOR0 {
         SD(s1, xEmu, offsetof(x64emu_t, op1));
         SD(xZR, xEmu, offsetof(x64emu_t, op2));
@@ -138,15 +141,15 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
     }
     IFX(X_SF) {
         if (rex.w) {
-            BGE(s1, xZR, 4);
+            BGE(s1, xZR, 8);
         } else {
             SRLI(s3, s1, 31);
-            BEQZ(s3, 4);
+            BEQZ(s3, 8);
         }
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     IFX(X_ZF) {
-        BEQZ(s1, 4);
+        BNEZ(s1, 8);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
@@ -172,12 +175,12 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     IFX(X_SF) {
         if (!rex.w) ZEROUP(s3);
         SRLI(s4, s3, rex.w?63:31);
-        BEQZ(s4, 4);
+        BEQZ(s4, 8);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
     IFX(X_ZF) {
-        BNEZ(s3, 4);
-        ORI(xFlags, xFlags, F_ZF);
+        BNEZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s3, s4, s5);
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.c b/src/dynarec/rv64/dynarec_rv64_functions.c
index 8df799bb..681d20d3 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.c
+++ b/src/dynarec/rv64/dynarec_rv64_functions.c
@@ -107,6 +107,55 @@ uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop)
 #undef F8
 #undef F32
 
+static int fpuCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst)
+{
+    // TODO
+    return 0;
+}
+
+static int flagsCacheNeedsTransform(dynarec_rv64_t* dyn, int ninst)
+{
+    int jmp = dyn->insts[ninst].x64.jmp_insts;
+    if(jmp<0)
+        return 0;
+    if(dyn->insts[ninst].f_exit.dfnone)  // flags are fully known, nothing we can do more
+        return 0;
+/*    if((dyn->f.pending!=SF_SET)
+    && (dyn->f.pending!=SF_SET_PENDING)) {
+        if(dyn->f.pending!=SF_PENDING) {*/
+    switch (dyn->insts[jmp].f_entry.pending) {
+        case SF_UNKNOWN: return 0;
+        case SF_SET: 
+            if(dyn->insts[ninst].f_exit.pending!=SF_SET && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING) 
+                return 1; 
+            else 
+                return 0;
+        case SF_SET_PENDING:
+            if(dyn->insts[ninst].f_exit.pending!=SF_SET 
+            && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING
+            && dyn->insts[ninst].f_exit.pending!=SF_PENDING) 
+                return 1; 
+            else 
+                return 0;
+        case SF_PENDING:
+            if(dyn->insts[ninst].f_exit.pending!=SF_SET 
+            && dyn->insts[ninst].f_exit.pending!=SF_SET_PENDING
+            && dyn->insts[ninst].f_exit.pending!=SF_PENDING)
+                return 1;
+            else
+                return (dyn->insts[jmp].f_entry.dfnone  == dyn->insts[ninst].f_exit.dfnone)?0:1;
+    }
+    if(dyn->insts[jmp].f_entry.dfnone && !dyn->insts[ninst].f_exit.dfnone)
+        return 1;
+    return 0;
+}
+int CacheNeedsTransform(dynarec_rv64_t* dyn, int ninst) {
+    int ret = 0;
+    if (fpuCacheNeedsTransform(dyn, ninst)) ret|=1;
+    if (flagsCacheNeedsTransform(dyn, ninst)) ret|=2;
+    return ret;
+}
+
 int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn)
 {
     (void)dyn;
diff --git a/src/dynarec/rv64/dynarec_rv64_functions.h b/src/dynarec/rv64/dynarec_rv64_functions.h
index ed94ac14..2fb76659 100644
--- a/src/dynarec/rv64/dynarec_rv64_functions.h
+++ b/src/dynarec/rv64/dynarec_rv64_functions.h
@@ -16,6 +16,9 @@ uintptr_t fakeed(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, uint8_t nextop)
 // Is what pointed at addr a native call? And if yes, to what function?
 int isNativeCall(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn);
 
+// FPU Cache transformation (for loops)
+int CacheNeedsTransform(dynarec_rv64_t* dyn, int i1);
+
 // predecessor access
 int isPred(dynarec_rv64_t* dyn, int ninst, int pred);
 int getNominalPred(dynarec_rv64_t* dyn, int ninst);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.c b/src/dynarec/rv64/dynarec_rv64_helper.c
index 02be87d5..4ae11186 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.c
+++ b/src/dynarec/rv64/dynarec_rv64_helper.c
@@ -517,7 +517,65 @@ void fpu_popcache(dynarec_rv64_t* dyn, int ninst, int s1, int not07)
     //TODO
 }
 
-void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val)
+static void fpuCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    //TODO
+}
+static void flagsCacheTransform(dynarec_rv64_t* dyn, int ninst, int s1)
+{
+#if STEP > 1
+    int j64;
+    int jmp = dyn->insts[ninst].x64.jmp_insts;
+    if(jmp<0)
+        return;
+    if(dyn->f.dfnone)  // flags are fully known, nothing we can do more
+        return;
+    MESSAGE(LOG_DUMP, "\tFlags fetch ---- ninst=%d -> %d\n", ninst, jmp);
+    int go = 0;
+    switch (dyn->insts[jmp].f_entry.pending) {
+        case SF_UNKNOWN: break;
+        case SF_SET: 
+            if(dyn->f.pending!=SF_SET && dyn->f.pending!=SF_SET_PENDING) 
+                go = 1; 
+            break;
+        case SF_SET_PENDING:
+            if(dyn->f.pending!=SF_SET 
+            && dyn->f.pending!=SF_SET_PENDING
+            && dyn->f.pending!=SF_PENDING) 
+                go = 1; 
+            break;
+        case SF_PENDING:
+            if(dyn->f.pending!=SF_SET 
+            && dyn->f.pending!=SF_SET_PENDING
+            && dyn->f.pending!=SF_PENDING)
+                go = 1;
+            else
+                go = (dyn->insts[jmp].f_entry.dfnone  == dyn->f.dfnone)?0:1;
+            break;
+    }
+    if(dyn->insts[jmp].f_entry.dfnone && !dyn->f.dfnone)
+        go = 1;
+    if(go) {
+        if(dyn->f.pending!=SF_PENDING) {
+            LW(s1, xEmu, offsetof(x64emu_t, df));
+            j64 = (GETMARK3)-(dyn->native_size);
+            BEQZ(s1, j64);
+        }
+        CALL_(UpdateFlags, -1, 0);
+        MARK3;
+    }
+#endif
+}
+
+void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) {
+    if(cacheupd&1)
+        fpuCacheTransform(dyn, ninst, s1, s2, s3);
+    if(cacheupd&2)
+        flagsCacheTransform(dyn, ninst, s1);
+}
+
+
+void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup)
 {
     // Depending on val, the following insns are emitted.
     // val == 0               -> ADDI
@@ -533,13 +591,16 @@ void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val)
         src = reg;
     }
     if (lo12 || !hi20) ADDI(reg, src, lo12);
+    if(zeroup && ((hi20&0x80000) || (!hi20 && (lo12&0x800)))) {
+        ZEROUP(reg);
+    }
 }
 
 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val)
 {
     if(((val<<32)>>32)==val) {
         // 32bits value
-        rv64_move32(dyn, ninst, reg, val);
+        rv64_move32(dyn, ninst, reg, val, 0);
         return;
     }
 
@@ -568,6 +629,6 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
     SRLW(s4, s4, s1);
     ANDI(s4, s4, 1);
 
-    BEQZ(s4, 4);
+    BEQZ(s4, 8);
     ORI(xFlags, xFlags, 1 << F_PF);
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 55869194..83e9c8cc 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -69,7 +69,7 @@
                 } else {                                \
                     SMREAD()                            \
                     addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, D); \
-                    LD(x1, wback, fixedaddress);    \
+                    LDxw(x1, wback, fixedaddress);      \
                     ed = x1;                            \
                 }
 
@@ -180,8 +180,8 @@
     LOAD_REG(R11);          \
 
 
-#define SET_DFNONE(S)    if(!dyn->f.dfnone) {MOV_U12(S, d_none); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;}
-#define SET_DF(S, N)     if((N)!=d_none) {MOV_U12(S, (N)); SD(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S)
+#define SET_DFNONE(S)    if(!dyn->f.dfnone) {MOV_U12(S, d_none); SW(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;}
+#define SET_DF(S, N)     if((N)!=d_none) {MOV_U12(S, (N)); SW(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S)
 #define SET_NODF()          dyn->f.dfnone = 0
 #define SET_DFOK()          dyn->f.dfnone = 1
 
@@ -198,27 +198,27 @@
         IFX(X_AF) {                                                       \
             /* af = bc & 0x8 */                                           \
             ANDI(scratch1, scratch2, 8);                                  \
-            BEQZ(scratch1, 4);                                            \
+            BEQZ(scratch1, 8);                                            \
             ORI(xFlags, xFlags, 1 << F_AF);                               \
         }                                                                 \
         IFX(X_CF) {                                                       \
             /* cf = bc & (1<<(width-1)) */                                \
-            if (width == 8) {                                             \
+            if ((width) == 8) {                                           \
                 ANDI(scratch1, scratch2, 0x80);                           \
             } else {                                                      \
-                SRLI(scratch1, scratch2, width-1);                        \
-                if (width == 16) ANDI(scratch1, scratch1, 1);             \
+                SRLI(scratch1, scratch2, (width)-1);                      \
+                if ((width) == 16) ANDI(scratch1, scratch1, 1);           \
             }                                                             \
-            BEQZ(scratch1, 4);                                            \
+            BEQZ(scratch1, 8);                                            \
             ORI(xFlags, xFlags, 1 << F_CF);                               \
         }                                                                 \
         IFX(X_OF) {                                                       \
             /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */     \
-            SRLI(scratch1, scratch2, width-2);                            \
+            SRLI(scratch1, scratch2, (width)-2);                          \
             SRLI(scratch2, scratch1, 1);                                  \
             XOR(scratch1, scratch1, scratch2);                            \
             ANDI(scratch1, scratch1, 1);                                  \
-            BEQZ(scratch1, 4);                                            \
+            BEQZ(scratch1, 8);                                            \
             ORI(xFlags, xFlags, 1 << F_OF2);                              \
         }                                                                 \
     }
@@ -611,10 +611,10 @@ void emit_pf(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 // Set rounding according to mxcsr flags, return reg to restore flags
 //int sse_setround(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3);
 
-//void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);
+void CacheTransform(dynarec_rv64_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3);
 
 void rv64_move64(dynarec_rv64_t* dyn, int ninst, int reg, int64_t val);
-void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val);
+void rv64_move32(dynarec_rv64_t* dyn, int ninst, int reg, int32_t val, int zeroup);
 
 #if STEP < 2
 #define CHECK_CACHE()   0
@@ -680,4 +680,99 @@ uintptr_t dynarec64_F30F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
 #define MAYUSE(A)
 #endif
 
+// GOCOND will use x1 and x3
+#define GOCOND(B, T1, T2)                                   \
+    case B+0x0:                                             \
+        INST_NAME(T1 "O " T2);                              \
+        GO( ANDI(x1, xFlags, 1<<F_OF2)                      \
+            , BEQZ, BNEZ, X_OF)                             \
+        break;                                              \
+    case B+0x1:                                             \
+        INST_NAME(T1 "NO " T2);                             \
+        GO( ANDI(x1, xFlags, 1<<F_OF2)                      \
+            , BNEZ, BEQZ, X_OF)                             \
+        break;                                              \
+    case B+0x2:                                             \
+        INST_NAME(T1 "C " T2);                              \
+        GO( ANDI(x1, xFlags, 1<<F_CF)                       \
+            , BEQZ, BNEZ, X_CF)                             \
+        break;                                              \
+    case B+0x3:                                             \
+        INST_NAME(T1 "NC " T2);                             \
+        GO( ANDI(x1, xFlags, 1<<F_CF)                       \
+            , BNEZ, BEQZ, X_CF)                             \
+        break;                                              \
+    case B+0x4:                                             \
+        INST_NAME(T1 "Z " T2);                              \
+        GO( ANDI(x1, xFlags, 1<<F_ZF)                       \
+            , BEQZ, BNEZ, X_ZF)                             \
+        break;                                              \
+    case B+0x5:                                             \
+        INST_NAME(T1 "NZ " T2);                             \
+        GO( ANDI(x1, xFlags, 1<<F_ZF)                       \
+            , BNEZ, BEQZ, X_ZF)                             \
+        break;                                              \
+    case B+0x6:                                             \
+        INST_NAME(T1 "BE " T2);                             \
+        GO( ANDI(x1, xFlags, (1<<F_CF)|(1<<F_ZF))           \
+            , BEQZ, BNEZ, X_CF|X_ZF)                        \
+        break;                                              \
+    case B+0x7:                                             \
+        INST_NAME(T1 "NBE " T2);                            \
+        GO( ANDI(x1, xFlags, (1<<F_CF)|(1<<F_ZF))           \
+            , BNEZ, BEQZ, X_CF|X_ZF)                        \
+        break;                                              \
+    case B+0x8:                                             \
+        INST_NAME(T1 "S " T2);                              \
+        GO( ANDI(x1, xFlags, 1<<F_SF)                       \
+            , BEQZ, BNEZ, X_SF)                             \
+        break;                                              \
+    case B+0x9:                                             \
+        INST_NAME(T1 "NS " T2);                             \
+        GO( ANDI(x1, xFlags, 1<<F_SF)                       \
+            , BNEZ, BEQZ, X_SF)                             \
+        break;                                              \
+    case B+0xA:                                             \
+        INST_NAME(T1 "P " T2);                              \
+        GO( ANDI(x1, xFlags, 1<<F_PF)                       \
+            , BEQZ, BNEZ, X_PF)                             \
+        break;                                              \
+    case B+0xB:                                             \
+        INST_NAME(T1 "NP " T2);                             \
+        GO( ANDI(x1, xFlags, 1<<F_PF)                       \
+            , BNEZ, BEQZ, X_PF)                             \
+        break;                                              \
+    case B+0xC:                                             \
+        INST_NAME(T1 "L " T2);                              \
+        GO( SRLI(x1, xFlags, F_SF-F_OF2);                   \
+            XOR(x1, x1, xFlags);                            \
+            ANDI(x1, x1, 1<<F_OF2)                          \
+            , BEQZ, BNEZ, X_SF|X_OF)                        \
+        break;                                              \
+    case B+0xD:                                             \
+        INST_NAME(T1 "GE " T2);                             \
+        GO( SRLI(x1, xFlags, F_SF-F_OF2);                   \
+            XOR(x1, x1, xFlags);                            \
+            ANDI(x1, x1, 1<<F_OF2)                          \
+            , BNEZ, BEQZ, X_SF|X_OF)                        \
+        break;                                              \
+    case B+0xE:                                             \
+        INST_NAME(T1 "LE " T2);                             \
+        GO( SRLI(x1, xFlags, F_SF-F_OF2);                   \
+            XOR(x1, x1, xFlags);                            \
+            ANDI(x3, xFlags, 1<<F_ZF);                      \
+            OR(x1, x1, x3);                                 \
+            ANDI(x1, x1, (1<<F_OF2) | (1<<F_ZF))            \
+            , BEQZ, BNEZ, X_SF|X_OF|X_ZF)                   \
+        break;                                              \
+    case B+0xF:                                             \
+        INST_NAME(T1 "G " T2);                              \
+        GO( SRLI(x1, xFlags, F_SF-F_OF2);                   \
+            XOR(x1, x1, xFlags);                            \
+            ANDI(x3, xFlags, 1<<F_ZF);                      \
+            OR(x1, x1, x3);                                 \
+            ANDI(x1, x1, (1<<F_OF2) | (1<<F_ZF))            \
+            , BNEZ, BEQZ, X_SF|X_OF|X_ZF)                   \
+        break
+
 #endif //__DYNAREC_RV64_HELPER_H__
diff --git a/src/dynarec/rv64/dynarec_rv64_pass2.h b/src/dynarec/rv64/dynarec_rv64_pass2.h
index 3255dcbf..408c3e97 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass2.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass2.h
@@ -6,7 +6,7 @@
         }
 
 #define MESSAGE(A, ...)  
-#define EMIT(A)     dyn->insts[ninst].size+=4; dyn->native_size+=4
+#define EMIT(A)     do {dyn->insts[ninst].size+=4; dyn->native_size+=4;}while(0)
 #define NEW_INST                                                                                        \
         if(ninst) {                                                                                     \
                 dyn->insts[ninst].address = (dyn->insts[ninst-1].address+dyn->insts[ninst-1].size);     \
diff --git a/src/dynarec/rv64/dynarec_rv64_pass3.h b/src/dynarec/rv64/dynarec_rv64_pass3.h
index 1d7eb6d5..93b5411e 100644
--- a/src/dynarec/rv64/dynarec_rv64_pass3.h
+++ b/src/dynarec/rv64/dynarec_rv64_pass3.h
@@ -4,10 +4,12 @@
         addInst(dyn->instsize, &dyn->insts_size, dyn->insts[ninst].x64.size, dyn->insts[ninst].size/4); \
     addInst(dyn->instsize, &dyn->insts_size, 0, 0);
 #define EMIT(A)     \
-    if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \
-    *(uint32_t*)(dyn->block) = (uint32_t)(A);       \
-    dyn->block += 4; dyn->native_size += 4;         \
-    dyn->insts[ninst].size2 += 4
+    do {                                                \
+        if(box64_dynarec_dump) {dynarec_log(LOG_NONE, "\t%08x\t%s\n", (uint32_t)(A), rv64_print(A, (uintptr_t)dyn->block));} \
+        *(uint32_t*)(dyn->block) = (uint32_t)(A);       \
+        dyn->block += 4; dyn->native_size += 4;         \
+        dyn->insts[ninst].size2 += 4;                   \
+    }while(0)
 
 #define MESSAGE(A, ...)  if(box64_dynarec_dump) dynarec_log(LOG_NONE, __VA_ARGS__)
 #define NEW_INST        \
@@ -35,16 +37,16 @@
             dyn->smread, dyn->smwrite);         \
         if(dyn->insts[ninst].pred_sz) {         \
             dynarec_log(LOG_NONE, ", pred=");   \
-            for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)\
-                dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]);\
-        }                                       \
-        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0)\
-            dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);\
-        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1)\
-            dynarec_log(LOG_NONE, ", jmp=out"); \
-        if(dyn->last_ip)                        \
-            dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);\
-        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");                       \
+            for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)                           \
+                dynarec_log(LOG_NONE, "%s%d", ii?"/":"", dyn->insts[ninst].pred[ii]);   \
+        }                                                                               \
+        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts>=0)             \
+            dynarec_log(LOG_NONE, ", jmp=%d", dyn->insts[ninst].x64.jmp_insts);         \
+        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts==-1)            \
+            dynarec_log(LOG_NONE, ", jmp=out");                                         \
+        if(dyn->last_ip)                                                                \
+            dynarec_log(LOG_NONE, ", last_ip=%p", (void*)dyn->last_ip);                 \
+        dynarec_log(LOG_NONE, "%s\n", (box64_dynarec_dump>1)?"\e[m":"");                \
     }
 
 #define TABLE64(A, V)   {int val64offset = Table64(dyn, (V)); MESSAGE(LOG_DUMP, "  Table64: 0x%lx\n", (V)); AUIPC(A, SPLIT20(val64offset)); LD(A, A, SPLIT12(val64offset));}
diff --git a/src/dynarec/rv64/rv64_emitter.h b/src/dynarec/rv64/rv64_emitter.h
index 32988c19..5f5cfe0c 100644
--- a/src/dynarec/rv64/rv64_emitter.h
+++ b/src/dynarec/rv64/rv64_emitter.h
@@ -110,7 +110,7 @@ f28–31  ft8–11  FP temporaries                  Caller
 
 // MOV64x/MOV32w is quite complex, so use a function for this
 #define MOV64x(A, B)    rv64_move64(dyn, ninst, A, B)
-#define MOV32w(A, B)    do{ rv64_move32(dyn, ninst, A, B); if(A&0x80000000) ZEROUP(A); }while(0);
+#define MOV32w(A, B)    rv64_move32(dyn, ninst, A, B, 1)
 #define MOV64xw(A, B)   if(rex.w) {MOV64x(A, B);} else {MOV32w(A, B);}
 
 // ZERO the upper part
diff --git a/src/dynarec/rv64/rv64_epilog.S b/src/dynarec/rv64/rv64_epilog.S
index fcaf646a..9b97c232 100644
--- a/src/dynarec/rv64/rv64_epilog.S
+++ b/src/dynarec/rv64/rv64_epilog.S
@@ -32,7 +32,7 @@ rv64_epilog:
     slli    x5, x5, 11-5
     or      x8, x8, x5
     sd      x8, 128(a0)     //xFlags
-    sd      x7, 136(a0)     // put back reg value in emu, including EIP (so x27 must be EIP now)
+    sd      x7, 136(a0)     // put back reg value in emu, including EIP (so x7 must be EIP now)
     //restore all used register
     ld      ra, (sp)  // save ra
     ld      x8, 8(sp) // save fp