about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <liuyang22@iscas.ac.cn>2024-11-12 02:53:55 +0800
committerGitHub <noreply@github.com>2024-11-11 19:53:55 +0100
commitb02942c0b0d8491c2d28128c4c948710f47f94f9 (patch)
tree36733af6467ee424b64b68ef80557b5ead630b6b /src
parent56e813ccb784fb454cc314ac4f03b044569cd650 (diff)
downloadbox64-b02942c0b0d8491c2d28128c4c948710f47f94f9.tar.gz
box64-b02942c0b0d8491c2d28128c4c948710f47f94f9.zip
[RV64_DYNAREC] Made eflags emulation branchless with xtheadcondmov (#2019)
* [RV64_DYNAREC] Made eflags emulation branchless with xtheadcondmov

* more
Diffstat (limited to 'src')
-rw-r--r--src/core.c5
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_0.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_2.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_00_3.c4
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_64.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_66.c6
-rw-r--r--src/dynarec/rv64/dynarec_rv64_67.c2
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_logic.c86
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_math.c311
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_shift.c174
-rw-r--r--src/dynarec/rv64/dynarec_rv64_emit_tests.c66
-rw-r--r--src/dynarec/rv64/dynarec_rv64_helper.h76
13 files changed, 299 insertions, 447 deletions
diff --git a/src/core.c b/src/core.c
index de004a9f..93bdd763 100644
--- a/src/core.c
+++ b/src/core.c
@@ -563,6 +563,9 @@ HWCAP2_AFP
     } else if (p != NULL && !strcasecmp(p, "xtheadmempair")) {
         RV64_Detect_Function();
         rv64_xtheadmempair = 0;
+    } else if (p != NULL && !strcasecmp(p, "xtheadcondmov")) {
+        RV64_Detect_Function();
+        rv64_xtheadcondmov = 0;
     }
 
     printf_log(LOG_INFO, "Dynarec for RISC-V ");
@@ -577,8 +580,8 @@ HWCAP2_AFP
     if(rv64_xtheadbb) printf_log(LOG_INFO, " XTheadBb");
     if(rv64_xtheadbs) printf_log(LOG_INFO, " XTheadBs");
     if (rv64_xtheadmempair) printf_log(LOG_INFO, " XTheadMemPair");
+    if (rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
     // Disable the display since these are only detected but never used.
-    // if(rv64_xtheadcondmov) printf_log(LOG_INFO, " XTheadCondMov");
     // if(rv64_xtheadmemidx) printf_log(LOG_INFO, " XTheadMemIdx");
     // if(rv64_xtheadfmemidx) printf_log(LOG_INFO, " XTheadFMemIdx");
     // if(rv64_xtheadmac) printf_log(LOG_INFO, " XTheadMac");
diff --git a/src/dynarec/rv64/dynarec_rv64_00_0.c b/src/dynarec/rv64/dynarec_rv64_00_0.c
index dcde6545..b9a6faf2 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_0.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_0.c
@@ -58,7 +58,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FAST_8BIT_OPERATION(wb, gb, x1, ADD(wb, wb, x1));
             GETEB(x1, 0);
             GETGB(x2);
-            emit_add8(dyn, ninst, x1, x2, x4, x5);
+            emit_add8(dyn, ninst, x1, x2, x4, x5, x6);
             EBBACK(x5, 0);
             break;
         case 0x01:
@@ -77,7 +77,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1));
             GETEB(x1, 0);
             GETGB(x2);
-            emit_add8(dyn, ninst, x2, x1, x4, x5);
+            emit_add8(dyn, ninst, x2, x1, x4, x5, x6);
             GBBACK(x5);
             break;
         case 0x03:
@@ -93,7 +93,7 @@ uintptr_t dynarec64_00_0(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
             SETFLAGS(X_ALL, SF_SET_PENDING);
             u8 = F8;
             ANDI(x1, xRAX, 0xff);
-            emit_add8c(dyn, ninst, x1, u8, x3, x4, x5);
+            emit_add8c(dyn, ninst, x1, u8, x3, x4, x5, x6);
             ANDI(xRAX, xRAX, ~0xff);
             OR(xRAX, xRAX, x1);
             break;
diff --git a/src/dynarec/rv64/dynarec_rv64_00_2.c b/src/dynarec/rv64/dynarec_rv64_00_2.c
index 7339963f..3390be54 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_2.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_2.c
@@ -58,7 +58,7 @@ uintptr_t dynarec64_00_2(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 1);
                     u8 = F8;
-                    emit_add8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    emit_add8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
                     EBBACK(x5, 0);
                     break;
                 case 1: // OR
diff --git a/src/dynarec/rv64/dynarec_rv64_00_3.c b/src/dynarec/rv64/dynarec_rv64_00_3.c
index cd2914cd..1a7157e8 100644
--- a/src/dynarec/rv64/dynarec_rv64_00_3.c
+++ b/src/dynarec/rv64/dynarec_rv64_00_3.c
@@ -1105,7 +1105,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("NEG Eb");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEB(x1, 0);
-                    emit_neg8(dyn, ninst, x1, x2, x4);
+                    emit_neg8(dyn, ninst, x1, x2, x4, x5);
                     EBBACK(x5, 0);
                     break;
                 case 4:
@@ -1175,7 +1175,7 @@ uintptr_t dynarec64_00_3(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     INST_NAME("NEG Ed");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETED(0);
-                    emit_neg32(dyn, ninst, rex, ed, x3, x4);
+                    emit_neg32(dyn, ninst, rex, ed, x3, x4, x5, x6);
                     WBACK;
                     break;
                 case 4:
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index 48682842..fd08d72b 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1762,7 +1762,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 GETED(1);
                 GETGD;
                 u8 = F8;
-                emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+                emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
                 WBACK;
             } else {
                 FAKEED;
@@ -1824,7 +1824,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 GETGD;
                 u8 = F8;
                 u8 &= (rex.w ? 0x3f : 0x1f);
-                emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+                emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4, x5);
                 WBACK;
             } else {
                 FAKEED;
@@ -2251,7 +2251,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGB(x2);
             if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
                 MV(x9, ed);
-            emit_add8(dyn, ninst, ed, gd, x4, x5);
+            emit_add8(dyn, ninst, ed, gd, x4, x5, x6);
             if (!(MODREG && wback == gb1 && !!(wb2) == !!(gb2)))
                 MV(gd, x9);
             EBBACK(x5, 0);
diff --git a/src/dynarec/rv64/dynarec_rv64_64.c b/src/dynarec/rv64/dynarec_rv64_64.c
index 168ee161..6f6deabf 100644
--- a/src/dynarec/rv64/dynarec_rv64_64.c
+++ b/src/dynarec/rv64/dynarec_rv64_64.c
@@ -165,7 +165,7 @@ uintptr_t dynarec64_64(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEBO(x1, 1);
                     u8 = F8;
-                    emit_add8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    emit_add8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
                     EBBACK(x5, 0);
                     break;
                 case 1: // OR
diff --git a/src/dynarec/rv64/dynarec_rv64_66.c b/src/dynarec/rv64/dynarec_rv64_66.c
index 47d17d04..a4df72f7 100644
--- a/src/dynarec/rv64/dynarec_rv64_66.c
+++ b/src/dynarec/rv64/dynarec_rv64_66.c
@@ -378,7 +378,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
             gd = xRAX + (opcode&7);
             ZEXTH(x1, gd);
-            emit_inc16(dyn, ninst, x1, x2, x3, x4);
+            emit_inc16(dyn, ninst, x1, x2, x3, x4, x5);
             INSHz(gd, x1, x3, x4, 1, 0);
             break;
         case 0x48:
@@ -1196,7 +1196,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("NEG Ew");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
                     GETEW(x1, 0);
-                    emit_neg16(dyn, ninst, ed, x2, x4);
+                    emit_neg16(dyn, ninst, ed, x2, x4, x5);
                     EWBACK;
                     break;
                 case 4:
@@ -1289,7 +1289,7 @@ uintptr_t dynarec64_66(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     INST_NAME("INC Ew");
                     SETFLAGS(X_ALL&~X_CF, SF_SUBSET_PENDING);
                     GETEW(x1, 0);
-                    emit_inc16(dyn, ninst, x1, x2, x4, x5);
+                    emit_inc16(dyn, ninst, x1, x2, x4, x5, x6);
                     EWBACK;
                     break;
                 case 1:
diff --git a/src/dynarec/rv64/dynarec_rv64_67.c b/src/dynarec/rv64/dynarec_rv64_67.c
index 7c3b3041..bedf6576 100644
--- a/src/dynarec/rv64/dynarec_rv64_67.c
+++ b/src/dynarec/rv64/dynarec_rv64_67.c
@@ -78,7 +78,7 @@ uintptr_t dynarec64_67(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             FAST_8BIT_OPERATION(gb, wb, x1, ADD(gb, gb, x1));
             GETEB32(x2, 0);
             GETGB(x1);
-            emit_add8(dyn, ninst, x1, x2, x3, x4);
+            emit_add8(dyn, ninst, x1, x2, x3, x4, x6);
             GBBACK(x4);
             break;
         case 0x03:
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_logic.c b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
index 99ca4684..04185f05 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_logic.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_logic.c
@@ -34,15 +34,13 @@ void emit_xor8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     ANDI(s1, s1, 0xff);
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_PEND) {
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+    IFX (X_ZF) {
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -62,15 +60,13 @@ void emit_xor8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
     ANDI(s1, s1, 0xff);
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_PEND) {
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -92,8 +88,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     // test sign bit before zeroup.
     IFX(X_SF) {
         if (!rex.w) SEXT_W(s1, s1);
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w && s1!=s2) {
         ZEROUP(s1);
@@ -104,8 +99,7 @@ void emit_xor32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -132,8 +126,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     // test sign bit before zeroup.
     IFX(X_SF) {
         if (!rex.w) SEXT_W(s1, s1);
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -144,8 +137,7 @@ void emit_xor32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     }
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -170,13 +162,11 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
     IFX(X_ZF | X_SF) {
         IFX(X_ZF) {
-            BNEZ(s1, 8);
-            ORI(xFlags, xFlags, 1 << F_ZF);
+            SET_FLAGS_EQZ(s1, F_ZF, s4);
         }
         IFX(X_SF) {
             SRLI(s3, s1, 15);
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_SF);
+            SET_FLAGS_NEZ(s3, F_SF, s4);
         }
     }
     IFX(X_PF) {
@@ -201,13 +191,11 @@ void emit_or16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4) {
 
     IFX(X_SF) {
         SRLI(s3, s1, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -229,8 +217,7 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     // test sign bit before zeroup.
     IFX(X_SF) {
         if (!rex.w) SEXT_W(s1, s1);
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -241,8 +228,7 @@ void emit_or32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     }
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -268,8 +254,7 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     // test sign bit before zeroup.
     IFX(X_SF) {
         if (!rex.w) SEXT_W(s1, s1);
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -280,8 +265,7 @@ void emit_or32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     }
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -307,12 +291,10 @@ void emit_and8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     }
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -337,12 +319,10 @@ void emit_and8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
     }
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -364,12 +344,10 @@ void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     }
     IFX(X_SF) {
         SRLI(s3, s1, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -394,12 +372,10 @@ void emit_and32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
     IFX(X_SF) {
         SRLI(s3, s1, rex.w?63:31);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -429,12 +405,10 @@ void emit_and32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     }
     IFX(X_SF) {
         SRLI(s3, s1, rex.w?63:31);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -458,12 +432,10 @@ void emit_or8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     }
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index a8557853..c666497d 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -35,31 +35,31 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_CF) {
         if (rex.w) {
             AND(s5, xMASK, s1);
-            if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
+            if (rv64_zba) // lo
+                ADDUW(s5, s2, s5);
+            else {
+                AND(s4, xMASK, s2);
+                ADD(s5, s5, s4);
+            }
             SRLI(s3, s1, 0x20);
             SRLI(s4, s2, 0x20);
             ADD(s4, s4, s3);
             SRLI(s5, s5, 0x20);
             ADD(s5, s5, s4); // hi
             SRAI(s5, s5, 0x20);
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
         } else {
             AND(s3, s1, xMASK);
             AND(s4, s2, xMASK);
             ADD(s5, s3, s4);
             SRLI(s5, s5, 0x20);
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
         }
+        SET_FLAGS_NEZ(s5, F_CF, s4);
     }
     IFX(X_AF | X_OF) {
         OR(s3, s1, s2);      // s3 = op1 | op2
-        AND(s4, s1, s2);      // s4 = op1 & op2
+        AND(s4, s1, s2);     // s4 = op1 & op2
     }
-
     ADDxw(s1, s1, s2);
-
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
@@ -73,21 +73,18 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, rex.w?62:30);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s5);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -96,8 +93,7 @@ void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
@@ -131,23 +127,25 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     IFX(X_CF) {
         if (rex.w) {
             AND(s5, xMASK, s1);
-            if(rv64_zba) ADDUW(s5, s2, s5); else {AND(s4, xMASK, s2); ADD(s5, s5, s4);} // lo
+            if (rv64_zba) // lo
+                ADDUW(s5, s2, s5);
+            else {
+                AND(s4, xMASK, s2);
+                ADD(s5, s5, s4);
+            }
             SRLI(s3, s1, 0x20);
             SRLI(s4, s2, 0x20);
             ADD(s4, s4, s3);
             SRLI(s5, s5, 0x20);
             ADD(s5, s5, s4); // hi
             SRAI(s5, s5, 0x20);
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
         } else {
             AND(s3, s1, xMASK);
             AND(s4, s2, xMASK);
             ADD(s5, s3, s4);
             SRLI(s5, s5, 0x20);
-            BEQZ(s5, 8);
-            ORI(xFlags, xFlags, 1 << F_CF);
         }
+        SET_FLAGS_NEZ(s5, F_CF, s4);
     }
     IFX(X_AF | X_OF) {
         OR(s3, s1, s2);      // s3 = op1 | op2
@@ -174,21 +172,18 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, rex.w?62:30);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s5);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -197,8 +192,7 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
@@ -232,35 +226,30 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 14);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
 
     IFX(X_CF) {
         SRLI(s3, s1, 16);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s3, F_CF, s4);
     }
 
     ZEXTH(s1, s1);
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_SF) {
         SRLI(s3, s1, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -268,7 +257,7 @@ void emit_add16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 }
 
 // emit ADD8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
-void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -294,35 +283,30 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 6);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     IFX(X_CF) {
         SRLI(s3, s1, 8);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s3, F_CF, s4);
     }
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     ANDI(s1, s1, 0xff);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -330,7 +314,7 @@ void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 }
 
 // emit ADD8 instruction, from s1, const c, store result in s1 using s3 and s4 as scratch
-void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4)
+void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, int s4, int s5)
 {
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -358,35 +342,30 @@ void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int c, int s2, int s3, i
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 6);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     IFX(X_CF) {
         SRLI(s3, s1, 8);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s3, F_CF, s4);
     }
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     ANDI(s1, s1, 0xff);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -414,16 +393,14 @@ void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     ANDI(s1, s1, 0xff);
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_PEND) {
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -460,15 +437,13 @@ void emit_sub16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
     SLLI(s1, s1, 48);
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     SRLI(s1, s1, 48);
 
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -498,16 +473,14 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
     }
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -564,16 +537,14 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
     }
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -612,30 +583,26 @@ void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 6);
             SRLI(s2, s3, 1);
             XOR(s3, s3, s2);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s4);
         }
     }
     IFX(X_SF) {
         ANDI(s2, s1, 0x80);
-        BEQZ(s2, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s2, F_SF, s4);
     }
     ANDI(s1, s1, 0xff);
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
@@ -668,30 +635,26 @@ void emit_dec8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         OR(s3, s3, s4);   // cc = (res & (~op1 | op2)) | (~op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 6);
             SRLI(s2, s3, 1);
             XOR(s3, s3, s2);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s4);
         }
     }
     IFX(X_SF) {
         ANDI(s2, s1, 0x80);
-        BEQZ(s2, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s2, F_SF, s4);
     }
     ANDI(s1, s1, 0xff);
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
@@ -727,21 +690,18 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         OR(s3, s3, s5);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             SRLI(s3, s3, rex.w?62:30);
             SRLI(s2, s3, 1);
             XOR(s3, s3, s2);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s4);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s4, s5);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -750,8 +710,7 @@ void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
@@ -783,21 +742,18 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         OR(s3, s3, s5);   // cc = (res & (~op1 | op2)) | (~op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             SRLI(s3, s3, rex.w?62:30);
             SRLI(s2, s3, 1);
             XOR(s3, s3, s2);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s4);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s4, s5);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -806,13 +762,12 @@ void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
 // emit INC16 instruction, from s1, store result in s1 using s3 and s4 as scratch
-void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
 {
     IFX(X_ALL) {
         ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_OF2) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));
@@ -843,29 +798,25 @@ void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         OR(s3, s3, s4);   // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 14);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
 
     ZEXTH(s1, s1);
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_SF) {
         SRLI(s3, s1, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -900,30 +851,26 @@ void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         OR(s3, s3, s5);   // cc = (res & (~op1 | op2)) | (~op1 & op2)
         IFX(X_AF) {
             ANDI(s2, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 14);
             SRLI(s2, s3, 1);
             XOR(s3, s3, s2);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     SLLIW(s1, s1, 16);
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s4, s5);
     }
     SRLIW(s1, s1, 16);
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
 }
 
@@ -956,12 +903,10 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 8);
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -1000,34 +945,29 @@ void emit_adc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
         OR(s3, s3, s5);  // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 6);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     IFX(X_CF) {
         SRLI(s3, s1, 8);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s3, F_CF, s5);
     }
 
     ANDI(s1, s1, 0xff);
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -1070,8 +1010,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     CLEAR_FLAGS();
     SLLIW(s1, s1, 16);
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s4, s5);
     }
     SRLIW(s1, s1, 16);
 
@@ -1081,8 +1020,7 @@ void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, 16);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -1111,8 +1049,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 
     CLEAR_FLAGS();
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s4, s5);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -1124,8 +1061,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 
     CALC_SUB_FLAGS(s5, s2, s1, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -1133,7 +1069,7 @@ void emit_sbb32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 }
 
 // emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
-void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
+void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -1152,8 +1088,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     IFX(X_CF) {
-        BEQZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s1, F_CF, s4);
     }
 
     IFX(X_AF | X_OF) {
@@ -1161,8 +1096,7 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         IFX(X_AF) {
             /* af = bc & 0x8 */
             ANDI(s2, s3, 8);
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
@@ -1170,13 +1104,11 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
             SRLI(s3, s2, 1);
             XOR(s2, s2, s3);
             ANDI(s2, s2, 1);
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s2, F_OF2, s4);
         }
     }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s4, s5);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -1185,13 +1117,12 @@ void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
 // emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch
-void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
+void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 {
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -1211,8 +1142,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
     }
 
     IFX(X_CF) {
-        BEQZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s1, F_CF, s4);
     }
 
     IFX(X_AF | X_OF) {
@@ -1220,8 +1150,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
         IFX(X_AF) {
             /* af = bc & 0x8 */
             ANDI(s2, s3, 8);
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
@@ -1229,8 +1158,7 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
             SRLI(s3, s2, 1);
             XOR(s2, s2, s3);
             ANDI(s2, s2, 1);
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s2, F_OF2, s4);
         }
     }
     IFX(X_SF) {
@@ -1242,13 +1170,12 @@ void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
 // emit NEG8 instruction, from s1, store result in s1 using s2 and s3 as scratch
-void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
+void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 {
     CLEAR_FLAGS();
     IFX(X_PEND) {
@@ -1268,8 +1195,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
     }
 
     IFX(X_CF) {
-        BEQZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s1, F_CF, s4);
     }
 
     IFX(X_AF | X_OF) {
@@ -1277,8 +1203,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
         IFX(X_AF) {
             /* af = bc & 0x8 */
             ANDI(s2, s3, 8);
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s2, F_AF, s4);
         }
         IFX(X_OF) {
             /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
@@ -1286,8 +1211,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
             SRLI(s3, s2, 1);
             XOR(s2, s2, s3);
             ANDI(s2, s2, 1);
-            BEQZ(s2, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s2, F_OF2, s4);
         }
     }
     IFX(X_SF) {
@@ -1298,8 +1222,7 @@ void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3)
         emit_pf(dyn, ninst, s1, s3, s2);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
 }
 
@@ -1336,34 +1259,29 @@ void emit_adc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         OR(s3, s3, s5);  // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, 14);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
     IFX(X_CF) {
         SRLI(s3, s1, 16);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
+        SET_FLAGS_NEZ(s3, F_CF, s5);
     }
 
     ZEXTH(s1, s1);
 
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_SF) {
         SRLI(s3, s1, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -1414,10 +1332,6 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_CF) {
-        BEQZ(s6, 8);
-        ORI(xFlags, xFlags, 1 << F_CF);
-    }
     IFX(X_AF | X_OF) {
         if(rv64_zbb) {
             ANDN(s3, s4, s1);   // s3 = ~res & (op1 | op2)
@@ -1428,21 +1342,21 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         OR(s3, s3, s5);  // cc = (~res & (op1 | op2)) | (op1 & op2)
         IFX(X_AF) {
             ANDI(s4, s3, 0x08); // AF: cc & 0x08
-            BEQZ(s4, 8);
-            ORI(xFlags, xFlags, 1 << F_AF);
+            SET_FLAGS_NEZ(s4, F_AF, s5);
         }
         IFX(X_OF) {
             SRLI(s3, s3, rex.w?62:30);
             SRLI(s4, s3, 1);
             XOR(s3, s3, s4);
             ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
-            BEQZ(s3, 8);
-            ORI(xFlags, xFlags, 1 << F_OF2);
+            SET_FLAGS_NEZ(s3, F_OF2, s5);
         }
     }
+    IFX (X_CF) {
+        SET_FLAGS_NEZ(s6, F_CF, s5);
+    }
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s5, s6);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -1451,7 +1365,6 @@ void emit_adc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_shift.c b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
index 5bcbcd2b..6e61b769 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_shift.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_shift.c
@@ -48,8 +48,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 
         SLLI(s1, s1, c+56);
         IFX(X_SF) {
-            BGE(s1, xZR, 8);
-            ORI(xFlags, xFlags, 1 << F_SF);
+            SET_FLAGS_LTZ(s1, F_SF, s3, s4);
         }
         SRLI(s1, s1, 56);
 
@@ -57,8 +56,7 @@ void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
             SB(s1, xEmu, offsetof(x64emu_t, res));
         }
         IFX(X_ZF) {
-            BNEZ(s1, 8);
-            ORI(xFlags, xFlags, 1 << F_ZF);
+            SET_FLAGS_EQZ(s1, F_ZF, s3);
         }
         IFX(X_OF) {
             // OF flag is affected only on 1-bit shifts
@@ -139,8 +137,7 @@ void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -175,8 +172,7 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
     // OF nop
     IFX(X_SF) {
         // SF is the same as the original operand
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
 
     SRLI(s1, s1, c);
@@ -186,8 +182,7 @@ void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -219,8 +214,7 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
 
     SLLI(s1, s1, 56);
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     SRLI(s1, s1, 56);
 
@@ -228,8 +222,7 @@ void emit_shl8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_OF) {
         // OF flag is affected only on 1-bit shifts
@@ -284,8 +277,7 @@ void emit_shr8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -316,8 +308,7 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     // OF nop
     IFX(X_SF) {
         // SF is the same as the original operand
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
 
     SRL(s1, s1, s2);
@@ -327,8 +318,7 @@ void emit_sar8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
         SB(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -362,8 +352,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 
         SLLI(s1, s1, c+48);
         IFX(X_SF) {
-            BGE(s1, xZR, 8);
-            ORI(xFlags, xFlags, 1 << F_SF);
+            SET_FLAGS_LTZ(s1, F_SF, s3, s4);
         }
         SRLI(s1, s1, 48);
 
@@ -371,8 +360,7 @@ void emit_shl16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
             SH(s1, xEmu, offsetof(x64emu_t, res));
         }
         IFX(X_ZF) {
-            BNEZ(s1, 8);
-            ORI(xFlags, xFlags, 1 << F_ZF);
+            SET_FLAGS_EQZ(s1, F_ZF, s3);
         }
         IFX(X_OF) {
             // OF flag is affected only on 1-bit shifts
@@ -452,8 +440,7 @@ void emit_shr16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -488,8 +475,7 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
     // OF nop
     IFX(X_SF) {
         // SF is the same as the original operand
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
 
     SRLI(s1, s1, c);
@@ -499,8 +485,7 @@ void emit_sar16c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -533,8 +518,7 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 
     SLLI(s1, s1, 48);
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     SRLI(s1, s1, 48);
 
@@ -542,8 +526,7 @@ void emit_shl16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_OF) {
         // OF flag is affected only on 1-bit shifts
@@ -598,8 +581,7 @@ void emit_shr16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -630,8 +612,7 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     // OF nop
     IFX(X_SF) {
         // SF is the same as the original operand
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
 
     SRL(s1, s1, s2);
@@ -641,8 +622,7 @@ void emit_sar16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -679,8 +659,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -689,8 +668,7 @@ void emit_shl32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_OF) {
         // OF flag is affected only on 1-bit shifts
@@ -738,8 +716,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -748,8 +725,7 @@ void emit_shl32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_OF) {
         // OF flag is affected only on 1-bit shifts
@@ -799,8 +775,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     SRL(s1, s1, s2);
 
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -809,8 +784,7 @@ void emit_shr32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -864,8 +838,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w && c == 0) {
         ZEROUP(s1);
@@ -874,8 +847,7 @@ void emit_shr32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -923,8 +895,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 
     // SRAIW sign-extends, so test sign bit before clearing upper bits
     IFX(X_SF) {
-        BGE(s1, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s1, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s1);
@@ -933,8 +904,7 @@ void emit_sar32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -1148,7 +1118,7 @@ void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
 }
 
 // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
-void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
 {
     c&=(rex.w?0x3f:0x1f);
     CLEAR_FLAGS();
@@ -1193,15 +1163,13 @@ void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     }
     IFX(X_SF) {
         SRLIxw(s3, s1, rex.w?63:31);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
     }
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_OF) {
         // the OF flag is set if a sign change occurred
@@ -1266,18 +1234,11 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         OR(s1, s1, s5);
     }
     ZEXTH(s1, s1);
-
-    IFX(X_SF) {
-        SLLIW(s3, s1, 16);
-        BGE(s3, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
-    }
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_OF) {
         // the OF flag is set if a sign change occurred
@@ -1289,12 +1250,16 @@ void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
             OR(xFlags, xFlags, s3);
         }
     }
+    IFX (X_SF) {
+        SLLIW(s3, s1, 16);
+        SET_FLAGS_LTZ(s3, F_SF, s4, s5);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
 }
 
-void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5)
 {
     c&=(rex.w?0x3f:0x1f);
     CLEAR_FLAGS();
@@ -1337,17 +1302,11 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
     if (!rex.w) {
         ZEROUP(s1);
     }
-    IFX(X_SF) {
-        SRLIxw(s3, s1, rex.w?63:31);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
-    }
     IFX(X_PEND) {
         SDxw(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
     }
     IFX(X_OF) {
         // the OF flag is set if a sign change occurred
@@ -1358,6 +1317,10 @@ void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
             ORI(xFlags, xFlags, s3);
         }
     }
+    IFX (X_SF) {
+        SRLIxw(s3, s1, rex.w ? 63 : 31);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -1397,15 +1360,6 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     if (!rex.w) {
         ZEROUP(s1);
     }
-    IFX(X_SF) {
-        SRLIxw(s3, s1, rex.w?63:31);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
-    }
-    IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
-    }
     IFX(X_OF) {
         ADDI(s5, s5, -1);
         BNEZ_MARK(s5);
@@ -1415,6 +1369,13 @@ void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         OR(xFlags, xFlags, s3);
         MARK;
     }
+    IFX (X_ZF) {
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
+    }
+    IFX (X_SF) {
+        SRLIxw(s3, s1, rex.w ? 63 : 31);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -1453,15 +1414,6 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
     if (!rex.w) {
         ZEROUP(s1);
     }
-    IFX(X_SF) {
-        SRLIxw(s3, s1, rex.w?63:31);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
-    }
-    IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
-    }
     IFX(X_OF) {
         ADDI(s5, s5, -1);
         BNEZ_MARK(s5);
@@ -1471,6 +1423,13 @@ void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         OR(xFlags, xFlags, s3);
         MARK;
     }
+    IFX (X_ZF) {
+        SET_FLAGS_EQZ(s1, F_ZF, s5);
+    }
+    IFX (X_SF) {
+        SRLIxw(s3, s1, rex.w ? 63 : 31);
+        SET_FLAGS_NEZ(s3, F_SF, s5);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -1524,19 +1483,9 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
         OR(s1, s1, s3);
     }
     ZEXTH(s1, s1);
-
-    IFX(X_SF) {
-        SLLIW(s4, s1, 16);
-        BGE(s4, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
-    }
     IFX(X_PEND) {
         SH(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
-    }
     IFX(X_OF) {
         // the OF flag is set if a sign change occurred
         if(c==1) {
@@ -1547,6 +1496,13 @@ void emit_shld16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uin
             OR(xFlags, xFlags, s3);
         }
     }
+    IFX (X_SF) {
+        SLLIW(s4, s1, 16);
+        SET_FLAGS_LTZ(s4, F_SF, s3, s5);
+    }
+    IFX (X_ZF) {
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
index 00c1fb7d..439951a3 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
@@ -46,13 +46,11 @@ void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     }
     IFX(X_SF) {
         SRLI(s3, s6, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 8);
     IFX(X_ZF) {
-        BNEZ(s6, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s6, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s6, s3, s4);
@@ -74,12 +72,10 @@ void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
 
     IFX(X_SF) {
         SRLI(s3, s1, 7);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -113,13 +109,11 @@ void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
     IFX(X_SF) {
         SRLI(s3, s6, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     CALC_SUB_FLAGS(s5, s2, s6, s3, s4, 16);
     IFX(X_ZF) {
-        BNEZ(s6, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s6, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s6, s3, s4);
@@ -141,12 +135,10 @@ void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4)
 
     IFX(X_SF) {
         SRLI(s3, s1, 15);
-        BEQZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s3, F_SF, s4);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -176,16 +168,14 @@ void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SDxw(s6, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        BGE(s6, xZR, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_LTZ(s6, F_SF, s3, s4);
     }
     if (!rex.w) {
         ZEROUP(s6);
     }
     CALC_SUB_FLAGS(s5, s2, s6, s3, s4, rex.w?64:32);
     IFX(X_ZF) {
-        BNEZ(s6, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s6, F_ZF, s4);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s6, s3, s4);
@@ -206,16 +196,14 @@ void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int
     }
     IFX(X_SF) {
         if (rex.w) {
-            BGE(s1, xZR, 8);
+            SET_FLAGS_LTZ(s1, F_SF, s3, s4);
         } else {
             SRLI(s3, s1, 31);
-            BEQZ(s3, 8);
+            SET_FLAGS_NEZ(s3, F_SF, s4);
         }
-        ORI(xFlags, xFlags, 1 << F_SF);
     }
     IFX(X_ZF) {
-        BNEZ(s1, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s1, F_ZF, s3);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
@@ -238,12 +226,10 @@ void emit_test8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
     IFX(X_SF) {
         SRLI(s4, s3, 7);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s4, F_SF, s5);
     }
     IFX(X_ZF) {
-        BNEZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s3, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s3, s4, s5);
@@ -267,12 +253,10 @@ void emit_test16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
     }
     IFX(X_SF) {
         SRLI(s4, s3, 15);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SET_FLAGS_NEZ(s4, F_SF, s5);
     }
     IFX(X_ZF) {
-        BNEZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s3, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s3, s4, s5);
@@ -298,13 +282,11 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
         if (!rex.w) ZEROUP(s3);
     }
     IFX(X_SF) {
-        SRLI(s4, s3, rex.w?63:31);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SRLI(s4, s3, rex.w ? 63 : 31);
+        SET_FLAGS_NEZ(s4, F_SF, s5);
     }
     IFX(X_ZF) {
-        BNEZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s3, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s3, s4, s5);
@@ -335,13 +317,11 @@ void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c,
         SDxw(s3, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_SF) {
-        SRLI(s4, s3, rex.w?63:31);
-        BEQZ(s4, 8);
-        ORI(xFlags, xFlags, 1 << F_SF);
+        SRLI(s4, s3, rex.w ? 63 : 31);
+        SET_FLAGS_NEZ(s4, F_SF, s5);
     }
     IFX(X_ZF) {
-        BNEZ(s3, 8);
-        ORI(xFlags, xFlags, 1 << F_ZF);
+        SET_FLAGS_EQZ(s3, F_ZF, s5);
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s3, s4, s5);
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index 9b30ab79..2ae12cf3 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -898,43 +898,71 @@
 #define CLEAR_FLAGS() \
     IFX(X_ALL) { ANDI(xFlags, xFlags, ~((1UL << F_AF) | (1UL << F_CF) | (1UL << F_OF2) | (1UL << F_ZF) | (1UL << F_SF) | (1UL << F_PF))); }
 
+#define SET_FLAGS_NEZ(reg, F, scratch)      \
+    do {                                    \
+        if (rv64_xtheadcondmov) {           \
+            ORI(scratch, xFlags, 1 << F);   \
+            TH_MVNEZ(xFlags, scratch, reg); \
+        } else {                            \
+            BEQZ(reg, 8);                   \
+            ORI(xFlags, xFlags, 1 << F);    \
+        }                                   \
+    } while (0)
+
+#define SET_FLAGS_EQZ(reg, F, scratch)      \
+    do {                                    \
+        if (rv64_xtheadcondmov) {           \
+            ORI(scratch, xFlags, 1 << F);   \
+            TH_MVEQZ(xFlags, scratch, reg); \
+        } else {                            \
+            BNEZ(reg, 8);                   \
+            ORI(xFlags, xFlags, 1 << F);    \
+        }                                   \
+    } while (0)
+
+#define SET_FLAGS_LTZ(reg, F, scratch1, scratch2) \
+    do {                                          \
+        if (rv64_xtheadcondmov) {                 \
+            SLT(scratch1, reg, xZR);              \
+            ORI(scratch2, xFlags, 1 << F);        \
+            TH_MVNEZ(xFlags, scratch2, scratch1); \
+        } else {                                  \
+            BGE(reg, xZR, 8);                     \
+            ORI(xFlags, xFlags, 1 << F);          \
+        }                                         \
+    } while (0)
+
+// might use op1_ as scratch
 #define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2, width)     \
-    IFX(X_AF | X_CF | X_OF)                                           \
-    {                                                                 \
+    IFX (X_AF | X_CF | X_OF) {                                        \
         /* calc borrow chain */                                       \
         /* bc = (res & (~op1 | op2)) | (~op1 & op2) */                \
         OR(scratch1, op1_, op2);                                      \
         AND(scratch2, res, scratch1);                                 \
         AND(op1_, op1_, op2);                                         \
         OR(scratch2, scratch2, op1_);                                 \
-        IFX(X_AF)                                                     \
-        {                                                             \
+        IFX (X_AF) {                                                  \
             /* af = bc & 0x8 */                                       \
             ANDI(scratch1, scratch2, 8);                              \
-            BEQZ(scratch1, 8);                                        \
-            ORI(xFlags, xFlags, 1 << F_AF);                           \
+            SET_FLAGS_NEZ(scratch1, F_AF, op1_);                      \
         }                                                             \
-        IFX(X_CF)                                                     \
-        {                                                             \
+        IFX (X_CF) {                                                  \
             /* cf = bc & (1<<(width-1)) */                            \
             if ((width) == 8) {                                       \
                 ANDI(scratch1, scratch2, 0x80);                       \
             } else {                                                  \
-                SRLI(scratch1, scratch2, (width)-1);                  \
+                SRLI(scratch1, scratch2, (width) - 1);                \
                 if ((width) != 64) ANDI(scratch1, scratch1, 1);       \
             }                                                         \
-            BEQZ(scratch1, 8);                                        \
-            ORI(xFlags, xFlags, 1 << F_CF);                           \
+            SET_FLAGS_NEZ(scratch1, F_CF, op1_);                      \
         }                                                             \
-        IFX(X_OF)                                                     \
-        {                                                             \
+        IFX (X_OF) {                                                  \
             /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */ \
-            SRLI(scratch1, scratch2, (width)-2);                      \
+            SRLI(scratch1, scratch2, (width) - 2);                    \
             SRLI(scratch2, scratch1, 1);                              \
             XOR(scratch1, scratch1, scratch2);                        \
             ANDI(scratch1, scratch1, 1);                              \
-            BEQZ(scratch1, 8);                                        \
-            ORI(xFlags, xFlags, 1 << F_OF2);                          \
+            SET_FLAGS_NEZ(scratch1, F_OF2, op1_);                     \
         }                                                             \
     }
 
@@ -1367,8 +1395,8 @@ void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int
 void emit_test32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
 void emit_add32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
-void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
-void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
+void emit_add8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_add8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
 void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
 void emit_sub8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@@ -1396,7 +1424,7 @@ void emit_xor16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4,
 void emit_and16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 // void emit_and16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 void emit_inc32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
-void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_inc16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_inc8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_dec32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_dec16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@@ -1413,9 +1441,9 @@ void emit_sbb8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
 void emit_sbb8c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5, int s6);
 void emit_sbb16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 // void emit_sbb16c(dynarec_rv64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
-void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
-void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
+void emit_neg32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
+void emit_neg16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_neg8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_shl8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shr8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_sar8c(dynarec_rv64_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4, int s5);
@@ -1437,8 +1465,8 @@ void emit_rol32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_ror32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_rol32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_ror32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
-void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
-void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shrd32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
+void emit_shld32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);
 void emit_shrd32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
 void emit_shld32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s5, int s3, int s4, int s6);
 void emit_shrd16c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4, int s5);