about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-10-29 13:09:51 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-10-29 13:09:51 +0100
commitd99d18759bf7d1e2e83b5c263f7f915ec8e7b7d0 (patch)
tree2c47bdb449c93f605849d2e78e4e09afd6ab5a13 /src
parent68c3be3e16e529521e35c7852820db5ee251281b (diff)
downloadbox64-d99d18759bf7d1e2e83b5c263f7f915ec8e7b7d0.tar.gz
box64-d99d18759bf7d1e2e83b5c263f7f915ec8e7b7d0.zip
[ARM64_DYNAREC] Also use Native Carry flags directly when possible
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_logic.c126
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_math.c156
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_tests.c48
-rw-r--r--src/dynarec/arm64/dynarec_arm64_f30f.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c242
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.h2
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c50
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h37
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass0.h10
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h13
13 files changed, 443 insertions, 257 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index f5f0cc96..ae337ac6 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -820,9 +820,9 @@ int convert_bitmask(uint64_t bitmask);
 #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt)  (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt))
 // mrs    x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000    o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0
 // MRS : from System register
-#define MRS_nzvc(Rt)                    EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt))
+#define MRS_nzcv(Rt)                    EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt))
 // MSR : to System register
-#define MSR_nzvc(Rt)                   FEMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
+#define MSR_nzcv(Rt)                   FEMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
 // mrs    x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000    o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0
 #define MRS_fpcr(Rt)                    EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt))
 #define MSR_fpcr(Rt)                    EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt))
diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 2ddb2212..41377f4d 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -923,9 +923,9 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     SETFLAGS(X_CF, SF_SUBSET);

                     GETED(0);

                     GETGD;

-                    MRS_nzvc(x3);

+                    MRS_nzcv(x3);

                     BFIx(x3, xFlags, 29, 1); // set C

-                    MSR_nzvc(x3);      // load CC into ARM CF

+                    MSR_nzcv(x3);      // load CC into ARM CF

                     IFX(X_CF) {

                         ADCSxw_REG(gd, gd, ed);

                         CSETw(x3, cCS);

diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index 94202ba1..358825c1 100644
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -234,10 +234,10 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     SETFLAGS(X_OF, SF_SUBSET);

                                     GETED32(0);

                                     GETGD;

-                                    MRS_nzvc(x3);

+                                    MRS_nzcv(x3);

                                     LSRw(x4, xFlags, F_OF);

                                     BFIx(x3, x4, 29, 1); // set C

-                                    MSR_nzvc(x3);      // load CC into ARM CF

+                                    MSR_nzcv(x3);      // load CC into ARM CF

                                     IFX(X_OF) {

                                         ADCSxw_REG(gd, gd, ed);

                                         CSETw(x3, cCS);

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c
index 5b9a08f2..8e6b9bff 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c
@@ -38,18 +38,21 @@ void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3,
     IFX(X_ZF) need_tst = 1;
     IFXNATIVE(X_SF, NF_SF) need_tst = 1;
     IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    IFXNATIVE(X_CF, NF_CF) need_tst = 1;
     if(need_tst) TSTxw_REG(s1, s1);
-    IFX(X_CF | X_AF | X_OF) {
-        IFNATIVE(NF_VF) {
-            IFX(X_CF | X_AF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
+        }
+    }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
         }
     }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s3, cEQ);
@@ -89,18 +92,21 @@ void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int
     IFX(X_ZF) need_tst = 1;
     IFXNATIVE(X_SF, NF_SF) need_tst = 1;
     IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    IFXNATIVE(X_CF, NF_CF) need_tst = 1;
     if(need_tst) TSTxw_REG(s1, s1);
-    IFX(X_CF | X_AF | X_OF) {
-        IFNATIVE(NF_VF) {
-            IFX(X_CF | X_AF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
         }
     }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
+        }
+    }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s3, cEQ);
@@ -135,18 +141,21 @@ void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_ZF) need_tst = 1;
     IFXNATIVE(X_SF, NF_SF) need_tst = 1;
     IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    IFXNATIVE(X_CF, NF_CF) need_tst = 1;
     if(need_tst) TSTxw_REG(s1, s1);
-    IFX(X_CF | X_AF | X_OF) {
-        IFNATIVE(NF_VF) {
-            IFX(X_CF | X_AF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
+        }
+    }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
         }
     }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s3, cEQ);
@@ -186,18 +195,21 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     IFX(X_ZF) need_tst = 1;
     IFXNATIVE(X_SF, NF_SF) need_tst = 1;
     IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    IFXNATIVE(X_CF, NF_CF) need_tst = 1;
     if(need_tst) TSTxw_REG(s1, s1);
-    IFX(X_CF | X_AF | X_OF) {
-        IFNATIVE(NF_VF) {
-            IFX(X_CF | X_AF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
+        }
+    }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
         }
     }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s3, cEQ);
@@ -224,7 +236,7 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    IFX(X_ZF|X_SF) {
+    IFX(X_ZF|X_SF|X_CF|X_OF) {
         ANDSxw_REG(s1, s1, s2);
     } else {
         ANDxw_REG(s1, s1, s2);
@@ -232,17 +244,19 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_CF | X_AF | X_OF) {
-        IFNATIVE(NF_VF) {
-            IFX(X_CF | X_AF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
         }
     }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
+        }
+    }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s3, cEQ);
@@ -286,17 +300,19 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_CF | X_AF | X_OF) {
-        IFNATIVE(NF_VF) {
-            IFX(X_CF | X_AF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
+        }
+    }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
         }
     }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s3, cEQ);
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c
index ac927c72..4478b1ab 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_math.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c
@@ -57,8 +57,10 @@ void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         }
     }
     IFX(X_CF) {
-        CSETw(s4, cCS);
-        BFIw(xFlags, s4, F_CF, 1);
+        IFNATIVE(NF_CF) {} else {
+            CSETw(s4, cCS);
+            BFIw(xFlags, s4, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -135,8 +137,10 @@ void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         }
     }
     IFX(X_CF) {
-        CSETw(s4, cCS);
-        BFIw(xFlags, s4, F_CF, 1);
+        IFNATIVE(NF_CF) {} else {
+            CSETw(s4, cCS);
+            BFIw(xFlags, s4, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -192,8 +196,12 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     }
     IFX(X_CF) {
         // inverted carry
-        CSETw(s4, cCC);
-        BFIw(xFlags, s4, F_CF, 1);
+        IFNATIVE(NF_CF) {
+            GEN_INVERTED_CARRY();
+        } else {
+            CSETw(s4, cCC);
+            BFIw(xFlags, s4, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -271,8 +279,12 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     }
     IFX(X_CF) {
         // inverted carry
-        CSETw(s4, cCC);
-        BFIw(xFlags, s4, F_CF, 1);
+        IFNATIVE(NF_CF) {
+            GEN_INVERTED_CARRY();
+        } else {
+            CSETw(s4, cCC);
+            BFIw(xFlags, s4, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -980,9 +992,21 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_AF) {
         MOVxw_REG(s4, s1);
     }
-    MRS_nzvc(s3);
-    BFIx(s3, xFlags, 29, 1); // set C
-    MSR_nzvc(s3);      // load CC into ARM CF
+    IFNATIVE_BEFORE(NF_CF) {
+        if(INVERTED_CARRY_BEFORE) {
+            if(arm64_flagm)
+                CFINV();
+            else {
+                MRS_nzcv(s3);
+                EORx_mask(s3, s3, 1, 35, 0);  //mask=1<<NZCV_C
+                MSR_nzcv(s3);
+            }
+        }
+    } else {
+        MRS_nzcv(s3);
+        BFIx(s3, xFlags, 29, 1); // set C
+        MSR_nzcv(s3);      // load CC into ARM CF
+    }
     IFX(X_ZF|X_CF|X_OF|X_SF) {
         ADCSxw_REG(s1, s1, s2);
     } else {
@@ -1006,8 +1030,10 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         }
     }
     IFX(X_CF) {
-        CSETw(s3, cCS);
-        BFIw(xFlags, s3, F_CF, 1);
+        IFNATIVE(NF_CF) {} else {
+            CSETw(s3, cCS);
+            BFIw(xFlags, s3, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -1110,9 +1136,21 @@ void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     IFX(X_AF | X_OF) {
         MOVw_REG(s4, s1);
     }
-    MRS_nzvc(s3);
-    BFIx(s3, xFlags, 29, 1); // set C
-    MSR_nzvc(s3);      // load CC into ARM CF
+    IFNATIVE_BEFORE(NF_CF) {
+        if(INVERTED_CARRY_BEFORE) {
+            if(arm64_flagm)
+                CFINV();
+            else {
+                MRS_nzcv(s3);
+                EORx_mask(s3, s3, 1, 35, 0);  //mask=1<<NZCV_C
+                MSR_nzcv(s3);
+            }
+        }
+    } else {
+        MRS_nzcv(s3);
+        BFIx(s3, xFlags, 29, 1); // set C
+        MSR_nzcv(s3);      // load CC into ARM CF
+    }
     ADCw_REG(s1, s1, s2);
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
@@ -1164,9 +1202,21 @@ void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     IFX(X_AF | X_OF) {
         MOVw_REG(s4, s1);
     }
-    MRS_nzvc(s3);
-    BFIx(s3, xFlags, 29, 1); // set C
-    MSR_nzvc(s3);      // load CC into ARM CF
+    IFNATIVE_BEFORE(NF_CF) {
+        if(INVERTED_CARRY_BEFORE) {
+            if(arm64_flagm)
+                CFINV();
+            else {
+                MRS_nzcv(s3);
+                EORx_mask(s3, s3, 1, 35, 0);  //mask=1<<NZCV_C
+                MSR_nzcv(s3);
+            }
+        }
+    } else {
+        MRS_nzcv(s3);
+        BFIx(s3, xFlags, 29, 1); // set C
+        MSR_nzcv(s3);      // load CC into ARM CF
+    }
     ADCw_REG(s1, s1, s2);
     IFX(X_PEND) {
         STRw_U12(s1, xEmu, offsetof(x64emu_t, res));
@@ -1274,10 +1324,22 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     } else IFX(X_ALL) {
         SET_DFNONE(s3);
     }
-    EORw_mask(s4, xFlags, 0, 0);        // invert CC because it's reverted for SUB on ARM
-    MRS_nzvc(s3);
-    BFIx(s3, s4, 29, 1); // set C
-    MSR_nzvc(s3);      // load CC into ARM CF
+    IFNATIVE_BEFORE(NF_CF) {
+        if(!INVERTED_CARRY_BEFORE) {
+            if(arm64_flagm)
+                CFINV();
+            else {
+                MRS_nzcv(s3);
+                EORx_mask(s3, s3, 1, 35, 0);  //mask=1<<NZCV_C
+                MSR_nzcv(s3);
+            }
+        }
+    } else {
+        EORw_mask(s4, xFlags, 0, 0);        // invert CC because it's reverted for SUB on ARM
+        MRS_nzcv(s3);
+        BFIx(s3, s4, 29, 1); // set C
+        MSR_nzcv(s3);      // load CC into ARM CF
+    }
     IFX(X_AF) {
         MVNxw_REG(s4, s1);
     }
@@ -1305,8 +1367,12 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     }
     IFX(X_CF) {
         // Inverted carry
-        CSETw(s3, cCC);
-        BFIw(xFlags, s3, F_CF, 1);
+        IFNATIVE(NF_CF) {
+            GEN_INVERTED_CARRY();
+        } else {
+            CSETw(s3, cCC);
+            BFIw(xFlags, s3, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -1408,10 +1474,22 @@ void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     } else IFX(X_ALL) {
         SET_DFNONE(s3);
     }
-    EORw_mask(s4, xFlags, 0, 0);        // invert CC because it's reverted for SUB on ARM
-    MRS_nzvc(s3);
-    BFIx(s3, s4, 29, 1); // set C
-    MSR_nzvc(s3);      // load CC into ARM CF
+    IFNATIVE_BEFORE(NF_CF) {
+        if(!INVERTED_CARRY_BEFORE) {
+            if(arm64_flagm)
+                CFINV();
+            else {
+                MRS_nzcv(s3);
+                EORx_mask(s3, s3, 1, 35, 0);  //mask=1<<NZCV_C
+                MSR_nzcv(s3);
+            }
+        }
+    } else {
+        EORw_mask(s4, xFlags, 0, 0);        // invert CC because it's reverted for SUB on ARM
+        MRS_nzcv(s3);
+        BFIx(s3, s4, 29, 1); // set C
+        MSR_nzcv(s3);      // load CC into ARM CF
+    }
     IFX(X_AF|X_OF|X_CF) {
         MVNw_REG(s4, s1);
     }
@@ -1463,10 +1541,22 @@ void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     } else IFX(X_ALL) {
         SET_DFNONE(s3);
     }
-    EORw_mask(s4, xFlags, 0, 0);            // invert CC because it's reverted for SUB on ARM
-    MRS_nzvc(s3);
-    BFIx(s3, s4, 29, 1); // set C, bit 29
-    MSR_nzvc(s3);      // load CC into ARM CF
+    IFNATIVE_BEFORE(NF_CF) {
+        if(!INVERTED_CARRY_BEFORE) {
+            if(arm64_flagm)
+                CFINV();
+            else {
+                MRS_nzcv(s3);
+                EORx_mask(s3, s3, 1, 35, 0);  //mask=1<<NZCV_C
+                MSR_nzcv(s3);
+            }
+        }
+    } else {
+        EORw_mask(s4, xFlags, 0, 0);            // invert CC because it's reverted for SUB on ARM
+        MRS_nzcv(s3);
+        BFIx(s3, s4, 29, 1); // set C, bit 29
+        MSR_nzcv(s3);      // load CC into ARM CF
+    }
     IFX(X_AF|X_OF|X_CF) {
         MVNw_REG(s4, s1);
     }
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c
index 51c9f1bf..1b5184f4 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c
@@ -54,8 +54,12 @@ void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     }
     IFX(X_CF) {
         // inverted carry
-        CSETw(s4, cCC);
-        BFIw(xFlags, s4, F_CF, 1);
+        IFNATIVE(NF_CF) {
+            GEN_INVERTED_CARRY();
+        } else {
+            CSETw(s4, cCC);
+            BFIw(xFlags, s4, F_CF, 1);
+        }
     }
     IFX(X_OF) {
         IFNATIVE(NF_VF) {} else {
@@ -89,17 +93,21 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int
     SUBSxw_U12(s3, s1, 0);   // res = s1 - 0
     // and now the tricky ones (and mostly unused), PF and AF
     // bc = (res & (~d | s)) | (~d & s) => is 0 here...
-    IFX(X_OF|X_AF|X_CF) {
-        IFXNATIVE(X_OF, NF_VF) {
-            IFX(X_AF|X_CF) {
-                MOV32w(s4, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s4);
-            }
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {
+            GEN_INVERTED_CARRY();
         } else {
-            MOV32w(s4, (1<<F_OF)|(1<<F_AF)|(1<<F_CF));
-            BICw(xFlags, xFlags, s4);
+            BFCw(xFlags, F_CF, 1);
         }
     }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
+        }
+    }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     IFX(X_ZF) {
         IFNATIVE(NF_EQ) {} else {
             CSETw(s4, cEQ);
@@ -253,17 +261,19 @@ void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     } else {
         SET_DFNONE(s4);
     }
-    IFX(X_CF | X_AF | X_OF) {
-        IFXNATIVE(X_OF, NF_VF) {
-            IFX(X_AF|X_CF) {
-                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
-                BICw(xFlags, xFlags, s3);
-            }
-        } else {
-            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-            BICw(xFlags, xFlags, s3);
+    IFX(X_CF) {
+        IFNATIVE(NF_CF) {} else {
+            BFCw(xFlags, F_CF, 1);
         }
     }
+    IFX(X_OF) {
+        IFNATIVE(NF_VF) {} else {
+            BFCw(xFlags, F_OF, 1);
+        }
+    }
+    IFX(X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
     ANDSxw_REG(s3, s1, s2);   // res = s1 & s2
     IFX_PENDOR0 {
         STRxw_U12(s3, xEmu, offsetof(x64emu_t, res));
diff --git a/src/dynarec/arm64/dynarec_arm64_f30f.c b/src/dynarec/arm64/dynarec_arm64_f30f.c
index ce73f08f..752595fd 100644
--- a/src/dynarec/arm64/dynarec_arm64_f30f.c
+++ b/src/dynarec/arm64/dynarec_arm64_f30f.c
@@ -184,10 +184,10 @@ uintptr_t dynarec64_F30F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                     SETFLAGS(X_OF, SF_SUBSET);

                     GETED(0);

                     GETGD;

-                    MRS_nzvc(x3);

+                    MRS_nzcv(x3);

                     LSRw(x4, xFlags, F_OF);

                     BFIx(x3, x4, 29, 1); // set C

-                    MSR_nzvc(x3);      // load CC into ARM CF

+                    MSR_nzcv(x3);      // load CC into ARM CF

                     IFX(X_OF) {

                         ADCSxw_REG(gd, gd, ed);

                         CSETw(x3, cCS);

diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index bc41ebd9..e2456ee6 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -686,6 +686,14 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
         }
         if(dyn->insts[ninst].use_nat_flags || dyn->insts[ninst].set_nat_flags || dyn->insts[ninst].need_nat_flags)
             printf_log(LOG_NONE, " nf:%hhx/%hhx/%hhx", dyn->insts[ninst].set_nat_flags, dyn->insts[ninst].use_nat_flags, dyn->insts[ninst].need_nat_flags);
+        if(dyn->insts[ninst].invert_carry)
+            printf_log(LOG_NONE, " CI");
+        if(dyn->insts[ninst].gen_inverted_carry)
+            printf_log(LOG_NONE, " gic");
+        if(dyn->insts[ninst].before_nat_flags&NF_CF)
+            printf_log(LOG_NONE, " %ccb", dyn->insts[ninst].normal_carry_before?'n':'i');
+        if(dyn->insts[ninst].need_nat_flags&NF_CF)
+            printf_log(LOG_NONE, " %cc", dyn->insts[ninst].normal_carry?'n':'i');
         if(dyn->insts[ninst].pred_sz) {
             dynarec_log(LOG_NONE, ", pred=");
             for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)
@@ -806,15 +814,18 @@ int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st)
 }
 
 
-uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag)
+uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag, int before)
 {
-    if(dyn->insts[ninst].x64.set_flags) {
+    if(dyn->insts[ninst].x64.set_flags && !before) {
         dyn->insts[ninst].set_nat_flags |= flag;
         if(dyn->insts[ninst].x64.use_flags) {
             dyn->insts[ninst].use_nat_flags |= flag;
         }
     } else {
-        dyn->insts[ninst].use_nat_flags |= flag;
+        if(before)
+            dyn->insts[ninst].use_nat_flags_before |= flag;
+        else
+            dyn->insts[ninst].use_nat_flags |= flag;
     }
     return flag;
 }
@@ -826,6 +837,7 @@ uint8_t flag2native(uint8_t flags)
     if(flags&X_ZF) ret|=NF_EQ;
     if(flags&X_SF) ret|=NF_SF;
     if(flags&X_OF) ret|=NF_VF;
+    if(flags&X_CF) ret|=NF_CF;
     #else
     // no native flags on rv64 or la64
     #endif
@@ -834,140 +846,110 @@ uint8_t flag2native(uint8_t flags)
 
 int flagIsNative(uint8_t flags)
 {
-    if(flags&(X_AF|X_PF|X_CF)) return 0;
+    if(flags&(X_AF|X_PF)) return 0;
     return 1;
 }
 
-static int markNativeFlags(dynarec_native_t* dyn, int ninst, uint8_t flags, int start)
+static uint8_t getNativeFlagsUsed(dynarec_arm_t* dyn, int start, uint8_t flags)
 {
-    while(ninst>=0) {
-//printf_log(LOG_INFO, "markNativeFlags ninst=%d, flags=%x, start=%d, nat_flags_op=%d, need_nat_flag=%x, flag_gen=%x need_before=%x need_after=%x\n", ninst, flags, start, dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].need_nat_flags, dyn->insts[ninst].x64.gen_flags, flag2native(dyn->insts[ninst].x64.need_before), flag2native(dyn->insts[ninst].x64.need_after));
-        // propagation already done
-        uint8_t flag_entry = (start && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags;
-        if((flag_entry&flags)==flags) return flag_entry;
-        // no more flag propagation
-        if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return flags;
-        // flags destroyed, cancel native flags
-        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return 0;
-        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return 0;
-        if(!flagIsNative(dyn->insts[ninst].x64.use_flags))  return 0;
-        if(start) {
-            start = 0;
-            flags |= flag2native(dyn->insts[ninst].x64.need_before);
-        } else if(dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags)&flags)) {
-            // this is the emitter of the native flags! so, is it good or not?
-            if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (dyn->insts[ninst].set_nat_flags&flags)==flags) {
-                dyn->insts[ninst].need_nat_flags |= flags;
-                if(!dyn->insts[ninst].x64.may_set)  // if flags just may be set, continue!
-                    return flags;
-            } else
-                return 0;
-        }
-        if(dyn->insts[ninst].use_nat_flags)
-            flags |= dyn->insts[ninst].use_nat_flags;
-        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)   // can happens on operation that read and generate flags
-            dyn->insts[ninst].before_nat_flags |= flags;
-        else
-            dyn->insts[ninst].need_nat_flags |= flags;
-        flags |= flag2native(dyn->insts[ninst].x64.need_before);
-        if(!dyn->insts[ninst].pred_sz)
+    // propagate and check wich flags are actually used
+    uint8_t used_flags = 0;
+    int ninst = start;
+    while(ninst<dyn->size) {
+//printf_log(LOG_INFO, "getNativeFlagsUsed ninst:%d/%d, flags=%x, used_flags=%x, nat_flags_op_before:%x, nat_flags_op:%x, need_after:%x\n", ninst, start, flags, used_flags, dyn->insts[ninst].nat_flags_op_before, dyn->insts[ninst].nat_flags_op, flag2native(dyn->insts[ninst].x64.need_after));
+        // check if this is an opcode that generate flags but consume flags before
+        if(dyn->insts[ninst].nat_flags_op_before)
             return 0;
-        for(int i=1; i<dyn->insts[ninst].pred_sz; ++i) {
-            int ret_flags = markNativeFlags(dyn, dyn->insts[ninst].pred[i], flags, 0);
-            if(!ret_flags)
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && dyn->insts[ninst].use_nat_flags_before)
+            used_flags|=dyn->insts[ninst].use_nat_flags_before&flags;
+        // if the opcode generate flags, return
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (start!=ninst)) {
+            if(used_flags&~dyn->insts[ninst].set_nat_flags)    // check partial changes that would destroy flag state
                 return 0;
-            flags|=ret_flags;
+            return used_flags;
         }
-        ninst = dyn->insts[ninst].pred[0];
-    }
-    return 0;
-}
-
-static void unmarkNativeFlags(dynarec_native_t* dyn, int ninst, int start)
-{
-//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, will check forward the real start\n", ninst);
-    // need to check if branch also goes forward to really start from the beggining
-    while((ninst<dyn->size) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].nat_flags_op && dyn->insts[ninst+1].before_nat_flags)
-        ninst++;
-
-    while(ninst>=0) {
-//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, start=%d\n", ninst, start);
-        // no more flag propagation
-        if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return;
-        // flags destroyed, but maybe it's be used
-        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return;
-        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return;
-        if(start)
-            start = 0;
-        else if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) {
-            if(!dyn->insts[ninst].x64.may_set) {
-                dyn->insts[ninst].need_nat_flags = 0;
-                dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED;
-                return;
-            }
+        // check if there is a callret barrier
+        if(dyn->insts[ninst].x64.has_callret)
+            return 0;
+        // check if flags are still needed
+        if(!(flag2native(dyn->insts[ninst].x64.need_after)&flags))
+            return used_flags;
+        // check if flags are destroyed, cancel the use then
+        if(dyn->insts[ninst].nat_flags_op && (start!=ninst))
+            return 0;
+        // check if flags are generated without native option
+        if((start!=ninst) && dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)&used_flags)) {
+            if(used_flags&~flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after))
+                return 0;   // partial covert, not supported for now (TODO: this might be fixable)
+            else
+                return used_flags;  // full covert... End of propagation
         }
-        dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED;
-        #if 0
-        // check forward
-        if(dyn->insts[ninst].x64.has_next && dyn->insts[ninst+1].need_nat_flags)
-            unmarkNativeFlags(dyn, ninst+1, 1);
-        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) {
+        // update used flags
+        used_flags |= (flag2native(dyn->insts[ninst].x64.need_after)&flags);
+        // go next
+        if(!dyn->insts[ninst].x64.has_next) {
+            // check if it's a jump to an opcode with only 1 preds, then just follow the jump
             int jmp = dyn->insts[ninst].x64.jmp_insts;
-            if(dyn->insts[jmp].need_nat_flags)
-                unmarkNativeFlags(dyn, jmp, 1);
-        }
-        #endif
-        // check if stop
-        if(((dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags)==0)
-                return;
-        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)   // can happens on operation that read and generate flags
-            dyn->insts[ninst].before_nat_flags = 0;
-        else
-            dyn->insts[ninst].need_nat_flags = 0;
-        if(!flag2native(dyn->insts[ninst].x64.need_before)) return;
-        if(!dyn->insts[ninst].pred_sz)
-            return;
-        for(int i=1; i<dyn->insts[ninst].pred_sz; ++i)
-            unmarkNativeFlags(dyn, dyn->insts[ninst].pred[i], 0);
-        if(!dyn->insts[ninst].x64.has_next)
-            return;
-        ninst = dyn->insts[ninst].pred[0];
-    }
-}
-
-static void propagateNativeFlags(dynarec_native_t* dyn, int ninst)
-{
-    uint8_t flags = dyn->insts[ninst].use_nat_flags&flag2native(dyn->insts[ninst].x64.need_before);
-    uint8_t flags_after = flag2native(dyn->insts[ninst].x64.need_after);
-    int marked_flags = markNativeFlags(dyn, ninst, flags, 1);
-    if(!marked_flags) {
-//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because marked_flags is 0\n", ninst);
-        unmarkNativeFlags(dyn, ninst, 1);
-        return;
+            if(dyn->insts[ninst].x64.jmp && (jmp!=-1) && (getNominalPred(dyn, jmp)==ninst))
+                ninst = jmp;
+            else
+                return used_flags;
+        } else
+            ++ninst;
     }
-    uint8_t need_flags;
-    // check if all next have the correct flag, or if using non-native flags while native are used
-    if(dyn->insts[ninst].x64.has_next && (flags_after&marked_flags)) {
-        need_flags = dyn->insts[ninst+1].nat_flags_op?dyn->insts[ninst+1].before_nat_flags:dyn->insts[ninst+1].need_nat_flags;    // native flags used
-        flags_after = flag2native(dyn->insts[ninst+1].x64.need_before)&~need_flags; // flags that are needs to be x86
-        if((need_flags&~marked_flags) || (!need_flags && (flags_after&marked_flags))) {
-//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because: need_flags=%hhx, flag_after=%hhx, marked_flags=%hhx\n", ninst, need_flags, flags_after, marked_flags);
-            unmarkNativeFlags(dyn, ninst, 1);
+    return used_flags;
+}
+
+static void propagateNativeFlags(dynarec_arm_t* dyn, int start)
+{
+    int ninst = start;
+    // those are the flags generated by the opcode and used later on
+    uint8_t flags = dyn->insts[ninst].set_nat_flags&flag2native(dyn->insts[ninst].x64.need_after);
+    //check if they are actualy used before starting
+//printf_log(LOG_INFO, "propagateNativeFlags called for start=%d, flags=%x, will need:%x\n", start, flags, flag2native(dyn->insts[ninst].x64.need_after));
+    if(!flags) return;
+    // also check if some native flags are used but not genereated here
+    if(flag2native(dyn->insts[ninst].x64.need_after)&~flags) return;
+    uint8_t used_flags = getNativeFlagsUsed(dyn, start, flags);
+//printf_log(LOG_INFO, " will use:%x, carry:%d, generate inverted carry:%d\n", used_flags, used_flags&NF_CF, dyn->insts[ninst].gen_inverted_carry);
+    if(!used_flags) return; // the flags wont be used, so just cancel
+    int nc = dyn->insts[ninst].gen_inverted_carry?0:1;
+    int carry = used_flags&NF_CF;
+    // propagate
+    while(ninst<dyn->size) {
+        // check if this is an opcode that generate flags but consume flags before
+        if((start!=ninst) && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) {
+            if(dyn->insts[ninst].use_nat_flags_before) {
+                dyn->insts[ninst].before_nat_flags |= used_flags;
+                if(carry) dyn->insts[ninst].normal_carry_before = nc;
+            }
+            // if the opcode generate flags, return
             return;
         }
-    }
-    #if 0
-    // check at jump point, as native flags are not converted
-    int jmp = dyn->insts[ninst].x64.jmp_insts;
-    if(dyn->insts[ninst].x64.jmp && jmp!=-1) {
-        need_flags = dyn->insts[jmp].need_nat_flags;
-        flags_after = flag2native(dyn->insts[jmp].x64.need_before);
-        if(((need_flags&flags_after)!=need_flags) || (!need_flags && (flags_after&marked_flags))) {
-            unmarkNativeFlags(dyn, ninst, 1);
+        // check if flags are generated without native option
+        if((start!=ninst) && dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags&dyn->insts[ninst].x64.need_after)&used_flags))
             return;
-        }
+        // mark the opcode
+        uint8_t use_flags = flag2native(dyn->insts[ninst].x64.need_before|dyn->insts[ninst].x64.need_after);
+        if(dyn->insts[ninst].x64.use_flags) use_flags |= flag2native(dyn->insts[ninst].x64.use_flags);  // should not change anything
+//printf_log(LOG_INFO, " marking ninst=%d with %x | %x&%x => %x\n", ninst, dyn->insts[ninst].need_nat_flags, used_flags, use_flags, dyn->insts[ninst].need_nat_flags | (used_flags&use_flags));
+        dyn->insts[ninst].need_nat_flags |= used_flags&use_flags;
+        if(carry) dyn->insts[ninst].normal_carry = nc;
+        if(carry && dyn->insts[ninst].invert_carry) nc = 0;
+        // check if flags are still needed
+        if(!(flag2native(dyn->insts[ninst].x64.need_after)&used_flags))
+            return;
+        // go next
+        if(!dyn->insts[ninst].x64.has_next) {
+            // check if it's a jump to an opcode with only 1 preds, then just follow the jump
+            int jmp = dyn->insts[ninst].x64.jmp_insts;
+            if(dyn->insts[ninst].x64.jmp && (jmp!=-1) && (getNominalPred(dyn, jmp)==ninst))
+                ninst = jmp;
+            else
+                return;
+        } else
+            ++ninst;
     }
-    #endif
 }
 
 void updateNatveFlags(dynarec_native_t* dyn)
@@ -975,8 +957,8 @@ void updateNatveFlags(dynarec_native_t* dyn)
     if(!box64_dynarec_nativeflags)
         return;
     // backward check if native flags are used
-    for(int ninst=dyn->size-1; ninst>=0; --ninst)
-        if(dyn->insts[ninst].use_nat_flags) {
+    for(int ninst=0; ninst<dyn->size; ++ninst)
+        if(flag2native(dyn->insts[ninst].x64.gen_flags) && (dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)) {
             propagateNativeFlags(dyn, ninst);
         }
 }
@@ -996,11 +978,19 @@ int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst)
     if(dyn->insts[ninst].set_nat_flags)
         return 0;
     uint8_t flags_before = dyn->insts[ninst].need_nat_flags;
+    uint8_t nc_before = dyn->insts[ninst].normal_carry;
+    if(dyn->insts[ninst].invert_carry)
+        nc_before = 0;
     uint8_t flags_after = dyn->insts[jmp].need_nat_flags;
-    if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH)
+    uint8_t nc_after = dyn->insts[jmp].normal_carry;
+    if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) {
         flags_after = dyn->insts[jmp].before_nat_flags;
+        nc_after = dyn->insts[jmp].normal_carry_before;
+    }
     uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before);
     flags_x86 &= ~flags_after;
+    if((flags_before&NF_CF) && (flags_after&NF_CF) && (nc_before!=nc_after))
+        return 1;
     // all flags_after should be present and none remaining flags_x86 
     if(((flags_before&flags_after)!=flags_after) || (flags_before&flags_x86))
         return 1;
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h
index 446c1cb1..b17a5bf1 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.h
+++ b/src/dynarec/arm64/dynarec_arm64_functions.h
@@ -54,7 +54,7 @@ int neoncache_no_i64(dynarec_arm_t* dyn, int ninst, int st, int a);
 // transform x86 flags to native flags
 uint8_t flag2native(uint8_t flags);
 // mark a instruction as using/generating flags. return flag
-uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag);
+uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag, int before);
 // propage the use of nativeflags or not (done between step 0 and step 1)
 void updateNatveFlags(dynarec_arm_t* dyn);
 // raz arm speicifc state when an opcode is unused
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index 38eacb2b..7a29562b 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -2383,7 +2383,11 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1)
             j64 = (GETMARKF2)-(dyn->native_size);
             CBZw(s1, j64);
         }
-        CALL_(UpdateFlags, -1, 0);
+        if(dyn->insts[ninst].need_nat_flags)
+            MRS_nzcv(s1);
+        CALL_(UpdateFlags, -1, s1);
+        if(dyn->insts[ninst].need_nat_flags)
+            MSR_nzcv(s1);
         MARKF2;
     }
 }
@@ -2395,12 +2399,18 @@ static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2)
     if(jmp<0)
         return;
     uint8_t flags_before = dyn->insts[ninst].need_nat_flags;
+    uint8_t nc_before = dyn->insts[ninst].normal_carry;
+    if(dyn->insts[ninst].invert_carry)
+        nc_before = 0;
     uint8_t flags_after = dyn->insts[jmp].need_nat_flags;
-    if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH)
+    uint8_t nc_after = dyn->insts[jmp].normal_carry;
+    if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH) {
         flags_after = dyn->insts[jmp].before_nat_flags;
+        nc_after = dyn->insts[jmp].normal_carry_before;
+    }
     uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before);
     flags_x86 &= ~flags_after;
-    MESSAGE(LOG_DUMP, "\tFNative flags transform ---- ninst=%d -> %d %hhx -> %hhx/%hhx\n", ninst, jmp, flags_before, flags_after, flags_x86);
+    MESSAGE(LOG_DUMP, "\tNative Flags transform ---- ninst=%d -> %d %hhx -> %hhx/%hhx\n", ninst, jmp, flags_before, flags_after, flags_x86);
     // flags present in before and missing in after
     if((flags_before&NF_EQ) && (flags_x86&NF_EQ)) {
         CSETw(s1, cEQ);
@@ -2414,28 +2424,50 @@ static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2)
         CSETw(s1, cVS);
         BFIw(xFlags, s1, F_OF, 1);
     }
+    if((flags_before&NF_CF) && (flags_x86&NF_CF)) {
+        if(nc_before) // might need to invert carry
+            CSETw(s1, cCS);
+        else
+            CSETw(s1, cCC);
+        BFIw(xFlags, s1, F_CF, 1);
+    }
     // flags missing and needed later
     int mrs = 0;
-    #define GO_MRS(A)   if(!mrs) {mrs=1; MRS_nzvc(s2); }
+    #define GO_MRS(A)   if(!mrs) {mrs=1; MRS_nzcv(s2); }
     if(!(flags_before&NF_EQ) && (flags_after&NF_EQ)) {
         GO_MRS(s2);
-        BFIw(s1, xFlags, F_ZF, 1);
+        UBFXw(s1, xFlags, F_ZF, 1);
         BFIx(s2, s1, NZCV_Z, 1);
     }
     if(!(flags_before&NF_SF) && (flags_after&NF_SF)) {
         GO_MRS(s2);
-        BFIw(s1, xFlags, F_SF, 1);
+        UBFXw(s1, xFlags, F_SF, 1);
         BFIx(s2, s1, NZCV_N, 1);
     }
     if(!(flags_before&NF_VF) && (flags_after&NF_VF)) {
         GO_MRS(s2);
-        BFIw(s1, xFlags, F_OF, 1);
+        UBFXw(s1, xFlags, F_OF, 1);
         BFIx(s2, s1, NZCV_V, 1);
     }
+    if(!(flags_before&NF_CF) && (flags_after&NF_CF)) {
+        GO_MRS(s2);
+        BFIx(s2, xFlags, NZCV_C, 1);    // F_CF is bit 0
+        if(!nc_after)
+            EORx_mask(s2, s2, 1, 35, 0);    //mask=1<<NZCV_C
+    }
+    // special case for NF_CF changing state
+    if((flags_before&NF_CF) && (flags_after&NF_CF) && (nc_before!=nc_after)) {
+        if(arm64_flagm && !mrs) {
+            CFINV();
+        } else {
+            GO_MRS(s2);
+            EORx_mask(s2, s2, 1, 35, 0);  //mask=1<<NZCV_C
+        }
+    }
     #undef GL_MRS
-    if(mrs) MSR_nzvc(s2);
+    if(mrs) MSR_nzcv(s2);
 
-    MESSAGE(LOG_DUMP, "\tF---- Native flags transform\n");
+    MESSAGE(LOG_DUMP, "\t---- Native Flags transform\n");
 }
 
 void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index dc4ad69b..dbf04ed6 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -910,6 +910,7 @@
 #ifndef IFNATIVE
 #define IFNATIVE(A)     if(dyn->insts[ninst].need_nat_flags&(A))
 #define IFNATIVEN(A)    if((dyn->insts[ninst].need_nat_flags&(A))==(A))
+#define IFNATIVE_BEFORE(A)     if(dyn->insts[ninst].before_nat_flags&(A))
 #endif
 
 #ifndef IFX
@@ -921,6 +922,16 @@
 #define IFXN(A, B)  if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B))))
 #define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.gen_flags&(X)) && (dyn->insts[ninst].need_nat_flags&(N)))
 #endif
+#ifndef INVERTED_CARRY
+#define INVERTED_CARRY          !dyn->insts[ninst].normal_carry
+#define INVERTED_CARRY_BEFORE   !dyn->insts[ninst].normal_carry_before
+#endif
+#ifndef GEN_INVERTED_CARRY
+#define GEN_INVERTED_CARRY()
+#endif
+#ifndef INVERT_CARRY
+#define INVERT_CARRY(A)     if(dyn->insts[ninst].normal_carry) {if(arm64_flagm) CFINV(); else {MRS_nzcv(A); EORx_mask(A, A, 1, 35, 0); MSR_nzcv(A);}}
+#endif
 
 // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done)
 #define FCOM(s1, s2, s3)                                                    \
@@ -1711,13 +1722,29 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         break;                                              \
     case B+0x2:                                             \
         INST_NAME(T1 "C " T2);                              \
+        IFNATIVE(NF_CF) {                                   \
+            if(INVERTED_CARRY) {                            \
+                GO( , cCS, cCC, X_CF)                       \
+            } else {                                        \
+                GO( , cCC, cCS, X_CF)                       \
+            }                                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0, 0)                         \
             , cEQ, cNE, X_CF)                               \
+        }                                                   \
         break;                                              \
     case B+0x3:                                             \
         INST_NAME(T1 "NC " T2);                             \
+        IFNATIVE(NF_CF) {                                   \
+            if(INVERTED_CARRY) {                            \
+                GO( , cCC, cCS, X_CF)                       \
+            } else {                                        \
+                GO( , cCS, cCC, X_CF)                       \
+            }                                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0, 0)                         \
             , cNE, cEQ, X_CF)                               \
+        }                                                   \
         break;                                              \
     case B+0x4:                                             \
         INST_NAME(T1 "Z " T2);                              \
@@ -1739,15 +1766,25 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         break;                                              \
     case B+0x6:                                             \
         INST_NAME(T1 "BE " T2);                             \
+        IFNATIVEN(NF_EQ|NF_CF) {                            \
+            INVERT_CARRY(x1);                               \
+            GO( , cHI, cLS, X_ZF|X_CF)                      \
+        } else {                                            \
         GO( MOV32w(x1, (1<<F_CF)|(1<<F_ZF));                \
             TSTw_REG(xFlags, x1)                            \
             , cEQ, cNE, X_CF|X_ZF)                          \
+        }                                                   \
         break;                                              \
     case B+0x7:                                             \
         INST_NAME(T1 "NBE " T2);                            \
+        IFNATIVEN(NF_EQ|NF_CF) {                            \
+            INVERT_CARRY(x1);                               \
+            GO( , cLS, cHI, X_ZF|X_CF)                      \
+        } else {                                            \
         GO( MOV32w(x1, (1<<F_CF)|(1<<F_ZF));                \
             TSTw_REG(xFlags, x1)                            \
             , cNE, cEQ, X_CF|X_ZF)                          \
+        }                                                   \
         break;                                              \
     case B+0x8:                                             \
         INST_NAME(T1 "S " T2);                              \
diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h
index d03223ed..4429aefb 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass0.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass0.h
@@ -51,12 +51,16 @@
         }
 
 #define FEMIT(A)        dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].x64.set_flags?NAT_FLAG_OP_TOUCH:NAT_FLAG_OP_UNUSABLE
-#define IFNATIVE(A)     if(mark_natflag(dyn, ninst, A))
-#define IFNATIVEN(A)    if(mark_natflag(dyn, ninst, A))
+#define IFNATIVE(A)     if(mark_natflag(dyn, ninst, A, 0))
+#define IFNATIVEN(A)    if(mark_natflag(dyn, ninst, A, 0))
 #define IFX(A)  if((dyn->insts[ninst].x64.set_flags&(A)))
 #define IFX2(A, B)  if((dyn->insts[ninst].x64.set_flags&(A)) B)
 #define IFX_PENDOR0  if((dyn->insts[ninst].x64.set_flags&(X_PEND) || !dyn->insts[ninst].x64.set_flags))
 #define IFXX(A) if((dyn->insts[ninst].x64.set_flags==(A)))
 #define IFX2X(A, B) if((dyn->insts[ninst].x64.set_flags==(A) || dyn->insts[ninst].x64.set_flags==(B) || dyn->insts[ninst].x64.set_flags==((A)|(B))))
 #define IFXN(A, B)  if((dyn->insts[ninst].x64.set_flags&(A) && !(dyn->insts[ninst].x64.set_flags&(B))))
-#define IFXNATIVE(X, N)  if((dyn->insts[ninst].x64.set_flags&(X)) && mark_natflag(dyn, ninst, N))
\ No newline at end of file
+#define IFXNATIVE(X, N)  if((dyn->insts[ninst].x64.set_flags&(X)) && mark_natflag(dyn, ninst, N, 0))
+#define GEN_INVERTED_CARRY()        dyn->insts[ninst].gen_inverted_carry = 1
+#define IFNATIVE_BEFORE(A)     if(mark_natflag(dyn, ninst, A, 1))
+#define INVERT_CARRY(A) dyn->insts[ninst].invert_carry = 1
+#define INVERT_CARRY_BEFORE(A) dyn->insts[ninst].invert_carry_before = 1
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 1ea9f658..b010d4ed 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -12,6 +12,7 @@ typedef struct instsize_s instsize_t;
 #define NF_EQ   (1<<0)
 #define NF_SF   (1<<1)
 #define NF_VF   (1<<2)
+#define NF_CF   (1<<3)
 
 // Nothing happens to the native flags
 #define NAT_FLAG_OP_NONE        0
@@ -109,9 +110,15 @@ typedef struct instruction_arm64_s {
     uint8_t             last_write;
     uint8_t             set_nat_flags;  // 0 or combinaison of native flags define
     uint8_t             use_nat_flags;  // 0 or combinaison of native flags define
-    uint8_t             nat_flags_op;// what happens to native flags here
+    uint8_t             use_nat_flags_before;  // 0 or combinaison of native flags define
+    uint8_t             nat_flags_op:4;// what happens to native flags here
+    uint8_t             nat_flags_op_before:4;// what happens to native flags here
     uint8_t             before_nat_flags;  // 0 or combinaison of native flags define
     uint8_t             need_nat_flags;
+    unsigned            gen_inverted_carry:1;
+    unsigned            normal_carry:1;
+    unsigned            normal_carry_before:1;
+    unsigned            invert_carry:1; // this opcode force an inverted carry
     flagcache_t         f_exit;     // flags status at end of instruction
     neoncache_t         n;          // neoncache at end of instruction (but before poping)
     flagcache_t         f_entry;    // flags status before the instruction begin
@@ -172,11 +179,11 @@ void CreateJmpNext(void* addr, void* next);
 #define GO_TRACE(A, B, s0)  \
     GETIP(addr);            \
     MOVx_REG(x1, xRIP);     \
-    MRS_nzvc(s0);           \
+    MRS_nzcv(s0);           \
     STORE_XEMU_CALL(xRIP);  \
     MOV32w(x2, B);          \
     CALL_(A, -1, s0);       \
-    MSR_nzvc(s0);           \
+    MSR_nzcv(s0);           \
     LOAD_XEMU_CALL(xRIP)
 
 #endif //__DYNAREC_ARM_PRIVATE_H_