about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/arm64_emitter.h96
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_logic.c220
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_math.c172
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c199
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_tests.c76
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.c201
-rw-r--r--src/dynarec/arm64/dynarec_arm64_functions.h10
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.c58
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h58
-rw-r--r--src/dynarec/arm64/dynarec_arm64_pass0.h13
-rw-r--r--src/dynarec/arm64/dynarec_arm64_private.h22
-rw-r--r--src/dynarec/dynarec_arch.h15
-rw-r--r--src/dynarec/dynarec_native.c2
14 files changed, 897 insertions, 257 deletions
diff --git a/src/dynarec/arm64/arm64_emitter.h b/src/dynarec/arm64/arm64_emitter.h
index 4dc681e7..f5f0cc96 100644
--- a/src/dynarec/arm64/arm64_emitter.h
+++ b/src/dynarec/arm64/arm64_emitter.h
@@ -178,35 +178,35 @@ int convert_bitmask(uint64_t bitmask);
 // ADD / SUB
 #define ADDSUB_REG_gen(sf, op, S, shift, Rm, imm6, Rn, Rd) ((sf)<<31 | (op)<<30 | (S)<<29 | 0b01011<<24 | (shift)<<22 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
 #define ADDx_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, 0, Rn, Rd))
-#define ADDSx_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd))
+#define ADDSx_REG(Rd, Rn, Rm)              FEMIT(ADDSUB_REG_gen(1, 0, 1, 0b00, Rm, 0, Rn, Rd))
 #define ADDx_REG_LSL(Rd, Rn, Rm, lsl)       EMIT(ADDSUB_REG_gen(1, 0, 0, 0b00, Rm, lsl, Rn, Rd))
 #define ADDw_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, 0, Rn, Rd))
-#define ADDSw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd))
+#define ADDSw_REG(Rd, Rn, Rm)              FEMIT(ADDSUB_REG_gen(0, 0, 1, 0b00, Rm, 0, Rn, Rd))
 #define ADDw_REG_LSL(Rd, Rn, Rm, lsl)       EMIT(ADDSUB_REG_gen(0, 0, 0, 0b00, Rm, lsl, Rn, Rd))
 #define ADDxw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b00, Rm, 0, Rn, Rd))
 #define ADDz_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 0, 0, 0b00, Rm, 0, Rn, Rd))
-#define ADDSxw_REG(Rd, Rn, Rm)              EMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd))
+#define ADDSxw_REG(Rd, Rn, Rm)             FEMIT(ADDSUB_REG_gen(rex.w, 0, 1, 0b00, Rm, 0, Rn, Rd))
 #define ADDxw_REG_LSR(Rd, Rn, Rm, lsr)      EMIT(ADDSUB_REG_gen(rex.w, 0, 0, 0b01, Rm, lsr, Rn, Rd))
 
 #define ADDSUB_IMM_gen(sf, op, S, shift, imm12, Rn, Rd)    ((sf)<<31 | (op)<<30 | (S)<<29 | 0b10001<<24 | (shift)<<22 | (imm12)<<10 | (Rn)<<5 | (Rd))
 #define ADDx_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd))
-#define ADDSx_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd))
+#define ADDSx_U12(Rd, Rn, imm12)   FEMIT(ADDSUB_IMM_gen(1, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd))
 #define ADDw_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(0, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd))
-#define ADDSw_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd))
+#define ADDSw_U12(Rd, Rn, imm12)   FEMIT(ADDSUB_IMM_gen(0, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd))
 #define ADDxw_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(rex.w, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd))
-#define ADDSxw_U12(Rd, Rn, imm12)   EMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd))
+#define ADDSxw_U12(Rd, Rn, imm12)  FEMIT(ADDSUB_IMM_gen(rex.w, 0, 1, 0b00, (imm12)&0xfff, Rn, Rd))
 #define ADDz_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(rex.is32bits?0:1, 0, 0, 0b00, (imm12)&0xfff, Rn, Rd))
 
 #define SUBx_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, 0, Rn, Rd))
-#define SUBSx_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd))
+#define SUBSx_REG(Rd, Rn, Rm)              FEMIT(ADDSUB_REG_gen(1, 1, 1, 0b00, Rm, 0, Rn, Rd))
 #define SUBx_REG_LSL(Rd, Rn, Rm, lsl)       EMIT(ADDSUB_REG_gen(1, 1, 0, 0b00, Rm, lsl, Rn, Rd))
 #define SUBw_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBw_REG_LSL(Rd, Rn, Rm, lsl)       EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, lsl, Rn, Rd))
-#define SUBSw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd))
-#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl)      EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd))
+#define SUBSw_REG(Rd, Rn, Rm)              FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd))
+#define SUBSw_REG_LSL(Rd, Rn, Rm, lsl)     FEMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, lsl, Rn, Rd))
 #define SUBxw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBz_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(rex.is32bits?0:1, 1, 0, 0b00, Rm, 0, Rn, Rd))
-#define SUBSxw_REG(Rd, Rn, Rm)              EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd))
+#define SUBSxw_REG(Rd, Rn, Rm)             FEMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd))
 #define CMPSx_REG(Rn, Rm)                   SUBSx_REG(xZR, Rn, Rm)
 #define CMPSw_REG(Rn, Rm)                   SUBSw_REG(wZR, Rn, Rm)
 #define CMPSxw_REG(Rn, Rm)                  SUBSxw_REG(xZR, Rn, Rm)
@@ -218,12 +218,12 @@ int convert_bitmask(uint64_t bitmask);
 #define NEGSxw_REG(Rd, Rm)                  SUBSxw_REG(Rd, xZR, Rm);
 
 #define SUBx_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd))
-#define SUBSx_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd))
+#define SUBSx_U12(Rd, Rn, imm12)   FEMIT(ADDSUB_IMM_gen(1, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd))
 #define SUBw_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(0, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd))
-#define SUBSw_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd))
+#define SUBSw_U12(Rd, Rn, imm12)   FEMIT(ADDSUB_IMM_gen(0, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd))
 #define SUBxw_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(rex.w, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd))
 #define SUBz_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(rex.is32bits?0:1, 1, 0, 0b00, (imm12)&0xfff, Rn, Rd))
-#define SUBSxw_U12(Rd, Rn, imm12)   EMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd))
+#define SUBSxw_U12(Rd, Rn, imm12)  FEMIT(ADDSUB_IMM_gen(rex.w, 1, 1, 0b00, (imm12)&0xfff, Rn, Rd))
 #define CMPSx_U12(Rn, imm12)        SUBSx_U12(xZR, Rn, imm12)
 #define CMPSw_U12(Rn, imm12)        SUBSw_U12(wZR, Rn, imm12)
 #define CMPSxw_U12(Rn, imm12)       SUBSxw_U12(xZR, Rn, imm12)
@@ -235,18 +235,18 @@ int convert_bitmask(uint64_t bitmask);
 #define SBCx_REG(Rd, Rn, Rm)        EMIT(ADDSUBC_gen(1, 1, 0, Rm, Rn, Rd))
 #define SBCw_REG(Rd, Rn, Rm)        EMIT(ADDSUBC_gen(0, 1, 0, Rm, Rn, Rd))
 #define SBCxw_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(rex.w, 1, 0, Rm, Rn, Rd))
-#define ADCSx_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(1, 0, 1, Rm, Rn, Rd))
-#define ADCSw_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(0, 0, 1, Rm, Rn, Rd))
-#define ADCSxw_REG(Rd, Rn, Rm)      EMIT(ADDSUBC_gen(rex.w, 0, 1, Rm, Rn, Rd))
-#define SBCSx_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(1, 1, 1, Rm, Rn, Rd))
-#define SBCSw_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd))
-#define SBCSxw_REG(Rd, Rn, Rm)      EMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd))
+#define ADCSx_REG(Rd, Rn, Rm)      FEMIT(ADDSUBC_gen(1, 0, 1, Rm, Rn, Rd))
+#define ADCSw_REG(Rd, Rn, Rm)      FEMIT(ADDSUBC_gen(0, 0, 1, Rm, Rn, Rd))
+#define ADCSxw_REG(Rd, Rn, Rm)     FEMIT(ADDSUBC_gen(rex.w, 0, 1, Rm, Rn, Rd))
+#define SBCSx_REG(Rd, Rn, Rm)      FEMIT(ADDSUBC_gen(1, 1, 1, Rm, Rn, Rd))
+#define SBCSw_REG(Rd, Rn, Rm)      FEMIT(ADDSUBC_gen(0, 1, 1, Rm, Rn, Rd))
+#define SBCSxw_REG(Rd, Rn, Rm)     FEMIT(ADDSUBC_gen(rex.w, 1, 1, Rm, Rn, Rd))
 
 // CCMP compare if cond is true, set nzcv if false
 #define CCMP_reg(sf, Rm, cond, Rn, nzcv)    ((sf)<<31 | 1<<30 | 1<<29 | 0b11010010<<21 | (Rm)<<16 | (cond)<<12 | (Rn)<<5 | (nzcv))
-#define CCMPw(Wn, Wm, nzcv, cond)   EMIT(CCMP_reg(0, Wm, cond, Wn, nzcv))
-#define CCMPx(Xn, Xm, nzcv, cond)   EMIT(CCMP_reg(1, Xm, cond, Xn, nzcv))
-#define CCMPxw(Xn, Xm, nzcv, cond)  EMIT(CCMP_reg(rex.w, Xm, cond, Xn, nzcv))
+#define CCMPw(Wn, Wm, nzcv, cond)  FEMIT(CCMP_reg(0, Wm, cond, Wn, nzcv))
+#define CCMPx(Xn, Xm, nzcv, cond)  FEMIT(CCMP_reg(1, Xm, cond, Xn, nzcv))
+#define CCMPxw(Xn, Xm, nzcv, cond) FEMIT(CCMP_reg(rex.w, Xm, cond, Xn, nzcv))
 
 // ADR
 #define ADR_gen(immlo, immhi, Rd)   ((immlo)<<29 | 0b10000<<24 | (immhi)<<5 | (Rd))
@@ -570,9 +570,9 @@ int convert_bitmask(uint64_t bitmask);
 #define ANDx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b00, N, immr, imms, Rn, Rd))
 #define ANDw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b00, 0, immr, imms, Rn, Rd))
 #define ANDxw_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(rex.w, 0b00, rex.w?(N):0, immr, imms, Rn, Rd))
-#define ANDSx_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd))
-#define ANDSw_mask(Rd, Rn, immr, imms)      EMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd))
-#define ANDSxw_mask(Rd, Rn, N, immr, imms)  EMIT(LOGIC_gen(rex.w, 0b11, rex.w?(N):0, immr, imms, Rn, Rd))
+#define ANDSx_mask(Rd, Rn, N, immr, imms)  FEMIT(LOGIC_gen(1, 0b11, N, immr, imms, Rn, Rd))
+#define ANDSw_mask(Rd, Rn, immr, imms)     FEMIT(LOGIC_gen(0, 0b11, 0, immr, imms, Rn, Rd))
+#define ANDSxw_mask(Rd, Rn, N, immr, imms) FEMIT(LOGIC_gen(rex.w, 0b11, rex.w?(N):0, immr, imms, Rn, Rd))
 #define ORRx_mask(Rd, Rn, N, immr, imms)    EMIT(LOGIC_gen(1, 0b01, N, immr, imms, Rn, Rd))
 #define ORRw_mask(Rd, Rn, immr, imms)       EMIT(LOGIC_gen(0, 0b01, 0, immr, imms, Rn, Rd))
 #define ORRxw_mask(Rd, Rn, N, immr, imms)   EMIT(LOGIC_gen(rex.w, 0b01, rex.w?(N):0, immr, imms, Rn, Rd))
@@ -588,9 +588,9 @@ int convert_bitmask(uint64_t bitmask);
 #define ANDw_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
 #define ANDw_REG_LSR(Rd, Rn, Rm, lsr)   EMIT(LOGIC_REG_gen(0, 0b00, 0b01, 0, Rm, lsr, Rn, Rd))
 #define ANDxw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
-#define ANDSx_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
-#define ANDSw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
-#define ANDSxw_REG(Rd, Rn, Rm)          EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
+#define ANDSx_REG(Rd, Rn, Rm)          FEMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
+#define ANDSw_REG(Rd, Rn, Rm)          FEMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
+#define ANDSxw_REG(Rd, Rn, Rm)         FEMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORRx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORRx_REG_LSL(Rd, Rn, Rm, lsl)   EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, lsl, Rn, Rd))
 #define ORRw_REG_LSL(Rd, Rn, Rm, lsl)   EMIT(LOGIC_REG_gen(0, 0b01, 0b00, 0, Rm, lsl, Rn, Rd))
@@ -641,10 +641,10 @@ int convert_bitmask(uint64_t bitmask);
 #define BICx(Rd, Rn, Rm)                EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
 #define BICw(Rd, Rn, Rm)                EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
 #define BICw_LSL(Rd, Rn, Rm, lsl)       EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, lsl, Rn, Rd))
-#define BICSx(Rd, Rn, Rm)               EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 1, Rm, 0, Rn, Rd))
-#define BICSw(Rd, Rn, Rm)               EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 1, Rm, 0, Rn, Rd))
-#define BICxw(Rd, Rn, Rm)               EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
-#define BICSxw(Rd, Rn, Rm)              EMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICSx(Rd, Rn, Rm)              FEMIT(LOGIC_REG_gen(1, 0b11, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICSw(Rd, Rn, Rm)              FEMIT(LOGIC_REG_gen(0, 0b11, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICxw(Rd, Rn, Rm)              FEMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICSxw(Rd, Rn, Rm)             FEMIT(LOGIC_REG_gen(rex.w, 0b11, 0b00, 1, Rm, 0, Rn, Rd))
 #define BICx_REG    BICx
 #define BICw_REG    BICw
 #define BICxw_REG   BICxw
@@ -822,7 +822,7 @@ int convert_bitmask(uint64_t bitmask);
 // MRS : from System register
 #define MRS_nzvc(Rt)                    EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt))
 // MSR : to System register
-#define MSR_nzvc(Rt)                    EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
+#define MSR_nzvc(Rt)                   FEMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
 // mrs    x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000    o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=0
 #define MRS_fpcr(Rt)                    EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt))
 #define MSR_fpcr(Rt)                    EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt))
@@ -849,6 +849,14 @@ int convert_bitmask(uint64_t bitmask);
 #define FPSR_DZC    1
 // NEON Invalid Operation Cumulative
 #define FPSR_IOC    0
+// NZCV N
+#define NZCV_N      31
+// NZCV Z
+#define NZCV_Z      30
+// NZCV C
+#define NZCV_C      29
+// NZCV V
+#define NZCV_V      28
                      
 // FCSEL
 #define FCSEL_scalar(type, Rm, cond, Rn, Rd)    (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | (cond)<<12 | 0b11<<10 | (Rn)<<5 | (Rd))
@@ -1273,10 +1281,10 @@ int convert_bitmask(uint64_t bitmask);
 
 // CMP
 #define FCMP_scalar(type, Rn, Rm, opc)  (0b11110<<24 | (type)<<22 | 1<<21 | (Rm)<<16 | 0b1000<<10 | (Rn)<<5 | (opc)<<3)
-#define FCMPS(Sn, Sm)               EMIT(FCMP_scalar(0b00, Sn, Sm, 0b00))
-#define FCMPD(Dn, Dm)               EMIT(FCMP_scalar(0b01, Dn, Dm, 0b00))
-#define FCMPS_0(Sn)                 EMIT(FCMP_scalar(0b00, Sn, 0, 0b01))
-#define FCMPD_0(Dn)                 EMIT(FCMP_scalar(0b01, Dn, 0, 0b01))
+#define FCMPS(Sn, Sm)              FEMIT(FCMP_scalar(0b00, Sn, Sm, 0b00))
+#define FCMPD(Dn, Dm)              FEMIT(FCMP_scalar(0b01, Dn, Dm, 0b00))
+#define FCMPS_0(Sn)                FEMIT(FCMP_scalar(0b00, Sn, 0, 0b01))
+#define FCMPD_0(Dn)                FEMIT(FCMP_scalar(0b01, Dn, 0, 0b01))
 
 // CVT
 #define FCVT_scalar(sf, type, rmode, opcode, Rn, Rd)    ((sf)<<31 | 0b11110<<24 | (type)<<22 | 1<<21 | (rmode)<<19 | (opcode)<<16 | (Rn)<<5 | (Rd))
@@ -2202,7 +2210,7 @@ int convert_bitmask(uint64_t bitmask);
 #define LDSETLH(Rs, Rt, Rn)             EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b011, Rn, Rt))
 #define STSETH(Rs, Rn)                  EMIT(ATOMIC_gen(0b01, 0, 0, Rs, 0b011, Rn, 0b11111))
 #define STSETLH(Rs, Rn)                 EMIT(ATOMIC_gen(0b01, 0, 1, Rs, 0b011, Rn, 0b11111))
-// Atomic Signel Max
+// Atomic Signed Max
 #define LDSMAXxw(Rs, Rt, Rn)             EMIT(ATOMIC_gen(0b10+rex.w, 0, 0, Rs, 0b100, Rn, Rt))
 #define LDSMAXAxw(Rs, Rt, Rn)            EMIT(ATOMIC_gen(0b10+rex.w, 1, 0, Rs, 0b100, Rn, Rt))
 #define LDSMAXALxw(Rs, Rt, Rn)           EMIT(ATOMIC_gen(0b10+rex.w, 1, 1, Rs, 0b100, Rn, Rt))
@@ -2318,23 +2326,23 @@ int convert_bitmask(uint64_t bitmask);
 
 // FLAGM extension
 // Invert Carry Flag
-#define CFINV()             EMIT(0b1101010100<<22 | 0b0100<<12 | 0b000<<5 | 0b11111)
+#define CFINV()                        FEMIT(0b1101010100<<22 | 0b0100<<12 | 0b000<<5 | 0b11111)
 
 #define RMIF_gen(imm6, Rn, mask)        (0b10111010000<<21 | (imm6)<<15 | 0b00001<<10 | (Rn)<<5 | (mask))
 // Rotate right reg and use as NZCV
-#define RMIF(Xn, shift, mask)           EMIT(RMIF_gen(shift, Xn, mask))
+#define RMIF(Xn, shift, mask)          FEMIT(RMIF_gen(shift, Xn, mask))
 
 #define SETF_gen(sz, Rn)                (0b00111010000<<21 | (sz)<<14 | 0b0010<<10 | (Rn)<<5 | 0b1101)
 // Set NZVc with 8bit value of reg: N=bit7, Z=[0..7]==0, V=bit8 eor bit7, C unchanged
-#define SETF8(Wn)                       EMIT(SETF_gen(0, Wn))
+#define SETF8(Wn)                      FEMIT(SETF_gen(0, Wn))
 // Set NZVc with 16bit value of reg: N=bit15, Z=[0..15]==0, V=bit16 eor bit15, C unchanged
-#define SETF16(Wn)                      EMIT(SETF_gen(1, Wn))
+#define SETF16(Wn)                     FEMIT(SETF_gen(1, Wn))
 
 // FLAGM2 extension
 // NZCV -> N=0 Z=C|V C=C&!V V=0
-#define AXFLAG()            EMIT(0b1101010100<<22 | 0b0100<<12 | 0b010<<5 | 0b11111)
+#define AXFLAG()           FEMIT(0b1101010100<<22 | 0b0100<<12 | 0b010<<5 | 0b11111)
 // NZCV -> N=!C&!Z Z=Z&C C=C|Z V=!C&Z
-#define XAFLAG()            EMIT(0b1101010100<<22 | 0b0100<<12 | 0b001<<5 | 0b11111)
+#define XAFLAG()           FEMIT(0b1101010100<<22 | 0b0100<<12 | 0b001<<5 | 0b11111)
 
 // FRINTTS extension
 #define FRINTxx_scalar(type, op, Rn, Rd)  (0b11110<<24 | (type)<<22 | 1<<21 | 0b0100<<17 | (op)<<15 | 0b10000<<10 | (Rn)<<5 | (Rd))
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 38a705b5..b3b2ac34 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -2162,8 +2162,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             CLZxw(gd, x1);    // x2 gets leading 0 == BSF

             MARK;

             IFX(X_ZF) {

-                CSETw(x1, cEQ);    //other flags are undefined

-                BFIw(xFlags, x1, F_ZF, 1);

+                IFNATIVE(NF_EQ) {} else {

+                    CSETw(x1, cEQ);    //other flags are undefined

+                    BFIw(xFlags, x1, F_ZF, 1);

+                }

             }

             break;

         case 0xBD:

@@ -2180,8 +2182,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             NEGxw_REG(gd, gd);   // complement

             MARK;

             IFX(X_ZF) {

-                CSETw(x1, cEQ);    //other flags are undefined

-                BFIw(xFlags, x1, F_ZF, 1);

+                IFNATIVE(NF_EQ) {} else {

+                    CSETw(x1, cEQ);    //other flags are undefined

+                    BFIw(xFlags, x1, F_ZF, 1);

+                }

             }

             break;

         case 0xBE:

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_logic.c b/src/dynarec/arm64/dynarec_arm64_emit_logic.c
index dcdffdab..5b9a08f2 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_logic.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_logic.c
@@ -34,18 +34,33 @@ void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3,
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFNATIVE(NF_VF) {
+            IFX(X_CF | X_AF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -70,18 +85,33 @@ void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFNATIVE(NF_VF) {
+            IFX(X_CF | X_AF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -101,18 +131,33 @@ void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFNATIVE(NF_VF) {
+            IFX(X_CF | X_AF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -137,18 +182,33 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    IFXNATIVE(X_OF, NF_VF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFNATIVE(NF_VF) {
+            IFX(X_CF | X_AF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -164,7 +224,7 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     } else IFX(X_ALL) {
         SET_DFNONE(s4);
     }
-    IFX(X_ZF) {
+    IFX(X_ZF|X_SF) {
         ANDSxw_REG(s1, s1, s2);
     } else {
         ANDxw_REG(s1, s1, s2);
@@ -173,16 +233,27 @@ void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFNATIVE(NF_VF) {
+            IFX(X_CF | X_AF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -216,16 +287,27 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFNATIVE(NF_VF) {
+            IFX(X_CF | X_AF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -355,13 +437,19 @@ void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
         BICw_REG(xFlags, xFlags, s3);
     }
-    IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
-    }
-    IFX(X_SF) {
-        LSRw(s3, s1, 7);
-        BFIw(xFlags, s3, F_SF, 1);
+    if(arm64_flagm) {
+        COMP_ZFSF(s1, 8)
+    } else {
+        IFX(X_ZF) {
+            IFNATIVE(NF_EQ) {} else {
+                CSETw(s3, cEQ);
+                BFIw(xFlags, s3, F_ZF, 1);
+            }
+        }
+        IFX(X_SF) {
+            LSRw(s3, s1, 7);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -378,14 +466,14 @@ void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4
     }
     int mask = convert_bitmask_w(c);
     if(mask) {
-        IFX(X_ZF) {
+        IFX(X_ZF|X_SF) {
             ANDSw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F);
         } else {
             ANDw_mask(s1, s1, mask&0x3F, (mask>>6)&0x3F);
         }
     } else {
         MOV32w(s3, c&0xff);
-        IFX(X_ZF) {
+        IFX(X_ZF|X_SF) {
             ANDSw_REG(s1, s1, s3);
         } else {
             ANDw_REG(s1, s1, s3);
@@ -399,12 +487,16 @@ void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4
         BICw(xFlags, xFlags, s3);
     }
     IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRw(s3, s1, 7);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRw(s3, s1, 7);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -571,13 +663,19 @@ void emit_and16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
         BICw_REG(xFlags, xFlags, s3);
     }
-    IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
-    }
-    IFX(X_SF) {
-        LSRw(s3, s1, 15);
-        BFIw(xFlags, s3, F_SF, 1);
+    if(arm64_flagm) {
+        COMP_ZFSF(s1, 16)
+    } else {
+        IFX(X_ZF) {
+            IFNATIVE(NF_EQ) {} else {
+                CSETw(s3, cEQ);
+                BFIw(xFlags, s3, F_ZF, 1);
+            }
+        }
+        IFX(X_SF) {
+            LSRw(s3, s1, 15);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c
index 5623e07b..ac927c72 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_math.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c
@@ -51,20 +51,26 @@ void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIxw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         CSETw(s4, cCS);
         BFIw(xFlags, s4, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIx(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIx(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -123,20 +129,26 @@ void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         BFIxw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         CSETw(s4, cCS);
         BFIw(xFlags, s4, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIx(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIx(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -173,8 +185,10 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         // inverted carry
@@ -182,12 +196,16 @@ void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIw(xFlags, s4, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIx(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIx(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -246,8 +264,10 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         // inverted carry
@@ -255,12 +275,16 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         BFIw(xFlags, s4, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIx(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIx(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -680,7 +704,7 @@ void emit_inc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
             ANDw_mask(s4, s1, 0, 0);             // s4 = op1 & op2
         }
     }
-    IFX(X_ZF|X_OF) {
+    IFX(X_ZF|X_OF|X_SF) {
         ADDSxw_U12(s1, s1, 1);
     } else {
         ADDxw_U12(s1, s1, 1);
@@ -695,16 +719,22 @@ void emit_inc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
         BFIxw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, rex.w?63:31);
-        BFIxw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -803,7 +833,7 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
             ORRw_mask(s3, s3, 0, 0);             // s3 = ~op1 | op2
         }
     }
-    IFX(X_ZF|X_OF) {
+    IFX(X_ZF|X_OF|X_SF) {
         SUBSxw_U12(s1, s1, 1);
     } else {
         SUBxw_U12(s1, s1, 1);
@@ -818,16 +848,22 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
         BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, rex.w?63:31);
-        BFIxw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, rex.w?63:31);
+            BFIxw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -947,7 +983,7 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     MRS_nzvc(s3);
     BFIx(s3, xFlags, 29, 1); // set C
     MSR_nzvc(s3);      // load CC into ARM CF
-    IFX(X_ZF|X_CF|X_OF) {
+    IFX(X_ZF|X_CF|X_OF|X_SF) {
         ADCSxw_REG(s1, s1, s2);
     } else {
         ADCxw_REG(s1, s1, s2);
@@ -964,20 +1000,26 @@ void emit_adc32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         CSETw(s3, cCS);
         BFIw(xFlags, s3, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s3, cVS);
-        BFIw(xFlags, s3, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s3, cVS);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRx(s3, s1, rex.w?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRx(s3, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -1239,7 +1281,7 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_AF) {
         MVNxw_REG(s4, s1);
     }
-    IFX(X_ZF|X_CF|X_OF) {
+    IFX(X_ZF|X_CF|X_OF|X_SF) {
         SBCSxw_REG(s1, s1, s2);
     } else {
         SBCxw_REG(s1, s1, s2);
@@ -1256,8 +1298,10 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s3, cEQ);
-        BFIw(xFlags, s3, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s3, cEQ);
+            BFIw(xFlags, s3, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         // Inverted carry
@@ -1265,12 +1309,16 @@ void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIw(xFlags, s3, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s3, cVS);
-        BFIw(xFlags, s3, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s3, cVS);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, rex.w?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -1535,7 +1583,7 @@ void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
     IFX(X_AF) {
         MOVxw_REG(s3, s1);
     }
-    IFX(X_ZF|X_OF) {
+    IFX(X_ZF|X_OF|X_SF) {
         NEGSxw_REG(s1, s1);
     } else {
         NEGxw_REG(s1, s1);
@@ -1544,12 +1592,16 @@ void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_AF) {
         ORRxw_REG(s3, s3, s1);                        // bc = op1 | res
@@ -1557,8 +1609,10 @@ void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
         BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, rex.w?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index b7e710a7..1fbcc983 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -54,14 +54,21 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIw(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_OF) {
         CMPSxw_U12(s2, 1);   // if s2==1
@@ -110,14 +117,21 @@ void emit_shl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIw(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_OF) {
         if(c==1) {
@@ -171,14 +185,21 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIx(xFlags, s4, F_SF, 1);
+        }
     }
     if(box64_dynarec_test)
         IFX(X_AF) {
@@ -220,14 +241,21 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        // no sign if c>0
-        BFCw(xFlags, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            // no sign if c>0
+            BFCw(xFlags, F_SF, 1);
+        }
     }
     if(box64_dynarec_test)
         IFX(X_AF) {
@@ -259,14 +287,21 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIx(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_OF)
         if(c==1) {
@@ -365,7 +400,11 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
             LSLw(s3, s1, c-1);
             BFXILw(xFlags, s3, 7, 1);   // insert F_CF from s3[7:1]
         }
-        MOVw_REG(s1, xZR);
+        IFXNATIVE(X_ZF|X_SF, NF_EQ|NF_SF) {
+            SUBSw_REG(s1, s1, s1);
+        } else {
+            MOVw_REG(s1, xZR);
+        }
         IFX(X_PEND) {
             STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
         }
@@ -373,15 +412,17 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
             BFCw(xFlags, F_OF, 1);
         }
         IFX(X_SF) {
-            BFCw(xFlags, F_SF, 1);
+            IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1);
         }
         if(box64_dynarec_test) IFX(X_AF) {
             BFCw(xFlags, F_AF, 1);
         }
         IFX(X_PF | X_ZF) {
-            MOV32w(s3, 1);
-            IFX(X_ZF) {
-                BFIw(xFlags, s3, F_ZF, 1);
+            IFNATIVE(NF_EQ) {IFX(X_PF) {MOV32w(s3, 1);}} else {
+                MOV32w(s3, 1);
+                IFX(X_ZF) {
+                    BFIw(xFlags, s3, F_ZF, 1);
+                }
             }
             IFX(X_PF) {
                 BFIw(xFlags, s3, F_PF, 1);
@@ -490,8 +531,8 @@ void emit_sar8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     }
     COMP_ZFSF(s1, 8)
     IFX(X_OF) {
-        CMPSw_U12(s2, 1);
-        Bcond(cNE, 4+4);
+        SUBw_U12(s4, s2, 1);
+        CBNZw(s4, 4+4);
             BFCw(xFlags, F_OF, 1);
     }
     if(box64_dynarec_test)
@@ -632,7 +673,11 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
             LSLw(s3, s1, c-1);
             BFXILw(xFlags, s3, 15, 1);   // insert F_CF from s3[15:1]
         }
-        MOVw_REG(s1, xZR);
+        IFXNATIVE(X_ZF|X_SF, NF_EQ|NF_SF) {
+            SUBSw_REG(s1, s1, s1);
+        } else {
+            MOVw_REG(s1, xZR);
+        }
         IFX(X_PEND) {
             STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
         }
@@ -640,12 +685,17 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
             BFCw(xFlags, F_OF, 1);
         }
         IFX(X_SF) {
-            BFCw(xFlags, F_SF, 1);
+            IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1);
+        }
+        if(box64_dynarec_test) IFX(X_AF) {
+            BFCw(xFlags, F_AF, 1);
         }
         IFX(X_PF | X_ZF) {
-            MOV32w(s3, 1);
-            IFX(X_ZF) {
-                BFIw(xFlags, s3, F_ZF, 1);
+            IFNATIVE(NF_EQ) {IFX(X_PF) {MOV32w(s3, 1);}} else {
+                MOV32w(s3, 1);
+                IFX(X_ZF) {
+                    BFIw(xFlags, s3, F_ZF, 1);
+                }
             }
             IFX(X_PF) {
                 BFIw(xFlags, s3, F_PF, 1);
@@ -670,8 +720,8 @@ void emit_shr16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         BFIw(xFlags, s4, 0, 1);
     }
     IFX(X_OF) {
-        CMPSw_U12(s2, 1);   // if s2==1
-            Bcond(cNE, 4+2*4);
+        SUBw_U12(s4, s2, 1);   // if s2==1
+        CBNZw(s4, 4+2*4);
             LSRw(s4, s1, 15);
             BFIw(xFlags, s4, F_OF, 1);
     }
@@ -751,8 +801,8 @@ void emit_sar16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     }
     COMP_ZFSF(s1, 16)
     IFX(X_OF) {
-        CMPSw_U12(s2, 1);
-        Bcond(cNE, 4+4);
+        SUBw_U12(s4, s2, 1);
+        CBNZw(s4, 4+4);
             BFCw(xFlags, F_OF, 1);
     }
     if(box64_dynarec_test)
@@ -1154,14 +1204,21 @@ void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIx(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_OF) {
         if(c==1) {
@@ -1257,18 +1314,25 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIw(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIx(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_OF) {
-        CMPSw_U12(s5, 1);
-        Bcond(cNE, 4+2*4);
+        SUBw_U12(s5, s5, 1);
+        CBNZw(s5, 4+2*4);    //flagless jump
             UBFXx(s3, s1, rex.w?63:31, 1);
             EORw_REG_LSL(xFlags, xFlags, s3, F_OF);  // OF is set if sign changed
     }
@@ -1303,18 +1367,25 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    int need_tst = 0;
+    IFX(X_ZF) need_tst = 1;
+    IFXNATIVE(X_SF, NF_SF) need_tst = 1;
+    if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_ZF) {
-        TSTxw_REG(s1, s1);
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s1, (rex.w)?63:31);
-        BFIx(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s1, (rex.w)?63:31);
+            BFIx(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_OF) {
-        CMPSw_U12(s5, 1);
-        Bcond(cNE, 4+2*4);
+        SUBw_U12(s5, s5, 1);
+        CBNZw(s5, 4+2*4);    //flagless jump
             UBFXx(s3, s1, rex.w?63:31, 1);
             EORw_REG_LSL(xFlags, xFlags, s3, F_OF);  // OF is set if sign changed
     }
@@ -1394,8 +1465,8 @@ void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
     }
     COMP_ZFSF(s1, 16)
     IFX(X_OF) {
-        CMPSw_U12(s5, 1);
-        Bcond(cNE, 4+2*4);
+        SUBw_U12(s5, s5, 1);
+        CBNZw(s5, 4+2*4);
             UBFXw(s3, s1, 15, 1);
             EORw_REG_LSL(xFlags, xFlags, s3, F_OF);  // OF is set if sign changed
     }
@@ -1485,8 +1556,8 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
     }
     COMP_ZFSF(s1, 16)
     IFX(X_OF) {
-        CMPSw_U12(s5, 1);
-        Bcond(cNE, 4+2*4);
+        SUBw_U12(s5, s5, 1);
+        CBNZw(s5, 4+2*4);
             UBFXw(s3, s1, 15, 1);
             EORw_REG_LSL(xFlags, xFlags, s3, F_OF);  // OF is set if sign changed
     }
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c
index 6a78ce8e..51c9f1bf 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c
@@ -47,8 +47,10 @@ void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_CF) {
         // inverted carry
@@ -56,12 +58,16 @@ void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         BFIw(xFlags, s4, F_CF, 1);
     }
     IFX(X_OF) {
-        CSETw(s4, cVS);
-        BFIw(xFlags, s4, F_OF, 1);
+        IFNATIVE(NF_VF) {} else {
+            CSETw(s4, cVS);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s5, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s5, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s5, s4);
@@ -84,16 +90,27 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int
     // and now the tricky ones (and mostly unused), PF and AF
     // bc = (res & (~d | s)) | (~d & s) => is 0 here...
     IFX(X_OF|X_AF|X_CF) {
-        MOV32w(s4, (1<<F_OF)|(1<<F_AF)|(1<<F_CF));
-        BICw(xFlags, xFlags, s4);
+        IFXNATIVE(X_OF, NF_VF) {
+            IFX(X_AF|X_CF) {
+                MOV32w(s4, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s4);
+            }
+        } else {
+            MOV32w(s4, (1<<F_OF)|(1<<F_AF)|(1<<F_CF));
+            BICw(xFlags, xFlags, s4);
+        }
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s3, s1, (rex.w)?63:31);
-        BFIw(xFlags, s3, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s3, s1, (rex.w)?63:31);
+            BFIw(xFlags, s3, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -237,20 +254,31 @@ void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         SET_DFNONE(s4);
     }
     IFX(X_CF | X_AF | X_OF) {
-        MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
-        BICw(xFlags, xFlags, s3);
+        IFXNATIVE(X_OF, NF_VF) {
+            IFX(X_AF|X_CF) {
+                MOV32w(s3, (1<<F_CF)|(1<<F_AF));
+                BICw(xFlags, xFlags, s3);
+            }
+        } else {
+            MOV32w(s3, (1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+            BICw(xFlags, xFlags, s3);
+        }
     }
     ANDSxw_REG(s3, s1, s2);   // res = s1 & s2
     IFX_PENDOR0 {
         STRxw_U12(s3, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
-        LSRxw(s4, s3, rex.w?63:31);
-        BFIw(xFlags, s4, F_SF, 1);
+        IFNATIVE(NF_SF) {} else {
+            LSRxw(s4, s3, rex.w?63:31);
+            BFIw(xFlags, s4, F_SF, 1);
+        }
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s3, s5);
@@ -275,8 +303,10 @@ void emit_test16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4,
         STRH_U12(s5, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
         LSRw(s4, s5, 15);
@@ -305,8 +335,10 @@ void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
         STRB_U12(s5, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_ZF) {
-        CSETw(s4, cEQ);
-        BFIw(xFlags, s4, F_ZF, 1);
+        IFNATIVE(NF_EQ) {} else {
+            CSETw(s4, cEQ);
+            BFIw(xFlags, s4, F_ZF, 1);
+        }
     }
     IFX(X_SF) {
         LSRw(s4, s5, 7);
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.c b/src/dynarec/arm64/dynarec_arm64_functions.c
index 8a17365f..ae36cd9b 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.c
+++ b/src/dynarec/arm64/dynarec_arm64_functions.c
@@ -678,6 +678,14 @@ void inst_name_pass3(dynarec_native_t* dyn, int ninst, const char* name, rex_t r
             dyn->insts[ninst].x64.need_before,
             dyn->insts[ninst].x64.need_after,
             dyn->smwrite, dyn->insts[ninst].will_write, dyn->insts[ninst].last_write);
+        if(dyn->insts[ninst].nat_flags_op) {
+            if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && dyn->insts[ninst].before_nat_flags)
+                printf_log(LOG_NONE, " NF:%d/read:%x", dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].before_nat_flags);
+            else
+                printf_log(LOG_NONE, " NF:%d", dyn->insts[ninst].nat_flags_op);
+        }
+        if(dyn->insts[ninst].use_nat_flags || dyn->insts[ninst].set_nat_flags || dyn->insts[ninst].need_nat_flags)
+            printf_log(LOG_NONE, " nf:%hhx/%hhx/%hhx", dyn->insts[ninst].set_nat_flags, dyn->insts[ninst].use_nat_flags, dyn->insts[ninst].need_nat_flags);
         if(dyn->insts[ninst].pred_sz) {
             dynarec_log(LOG_NONE, ", pred=");
             for(int ii=0; ii<dyn->insts[ninst].pred_sz; ++ii)
@@ -796,3 +804,196 @@ int fpu_is_st_freed(dynarec_native_t* dyn, int ninst, int st)
 {
     return (dyn->n.tags&(0b11<<(st*2)))?1:0;
 }
+
+
+uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag)
+{
+    if(dyn->insts[ninst].x64.set_flags) {
+        dyn->insts[ninst].set_nat_flags |= flag;
+        if(dyn->insts[ninst].x64.use_flags) {
+            dyn->insts[ninst].use_nat_flags |= flag;
+        }
+    } else {
+        dyn->insts[ninst].use_nat_flags |= flag;
+    }
+    return flag;
+}
+
+uint8_t flag2native(uint8_t flags)
+{
+    uint8_t ret = 0;
+    #ifdef ARM64
+    if(flags&X_ZF) ret|=NF_EQ;
+    if(flags&X_SF) ret|=NF_SF;
+    if(flags&X_OF) ret|=NF_VF;
+    #else
+    // no native flags on rv64 or la64
+    #endif
+    return ret;
+}
+
+static int markNativeFlags(dynarec_native_t* dyn, int ninst, uint8_t flags, int start)
+{
+    while(ninst>=0) {
+//printf_log(LOG_INFO, "markNativeFlags ninst=%d, flags=%x, start=%d, nat_flags_op=%d, need_nat_flag=%x, flag_gen=%x need_before=%x need_after=%x\n", ninst, flags, start, dyn->insts[ninst].nat_flags_op, dyn->insts[ninst].need_nat_flags, dyn->insts[ninst].x64.gen_flags, flag2native(dyn->insts[ninst].x64.need_before), flag2native(dyn->insts[ninst].x64.need_after));
+        // propagation already done
+        uint8_t flag_entry = (start && dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags;
+        if((flag_entry&flags)==flags) return flag_entry;
+        // no more flag propagation
+        if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return flags;
+        // flags destroyed, cancel native flags
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return 0;
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return 0;
+        if(start) {
+            start = 0;
+            flags |= flag2native(dyn->insts[ninst].x64.need_before);
+        } else if(dyn->insts[ninst].x64.gen_flags && (flag2native(dyn->insts[ninst].x64.gen_flags)&flags)) {
+            // this is the emitter of the native flags! so, is it good or not?
+            if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && (dyn->insts[ninst].set_nat_flags&flags)==flags) {
+                dyn->insts[ninst].need_nat_flags |= flags;
+                if(!dyn->insts[ninst].x64.may_set)  // if flags just may be set, continue!
+                    return flags;
+            } else
+                return 0;
+        }
+        if(dyn->insts[ninst].use_nat_flags)
+            flags |= dyn->insts[ninst].use_nat_flags;
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)   // can happens on operation that read and generate flags
+            dyn->insts[ninst].before_nat_flags |= flags;
+        else
+            dyn->insts[ninst].need_nat_flags |= flags;
+        flags |= flag2native(dyn->insts[ninst].x64.need_before);
+        if(!dyn->insts[ninst].pred_sz)
+            return 0;
+        for(int i=1; i<dyn->insts[ninst].pred_sz; ++i) {
+            int ret_flags = markNativeFlags(dyn, dyn->insts[ninst].pred[i], flags, 0);
+            if(!ret_flags)
+                return 0;
+            flags|=ret_flags;
+        }
+        ninst = dyn->insts[ninst].pred[0];
+    }
+    return 0;
+}
+
+static void unmarkNativeFlags(dynarec_native_t* dyn, int ninst, int start)
+{
+//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, will check forward the real start\n", ninst);
+    // need to check if branch also goes forward to really start from the beggining
+    while((ninst<dyn->size) && dyn->insts[ninst].x64.has_next && !dyn->insts[ninst].nat_flags_op && dyn->insts[ninst+1].before_nat_flags)
+        ninst++;
+
+    while(ninst>=0) {
+//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, start=%d\n", ninst, start);
+        // no more flag propagation
+        if(!start && !flag2native(dyn->insts[ninst].x64.need_after)) return;
+        // flags destroyed, but maybe it's be used
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_UNUSABLE) return;
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_CANCELED) return;
+        if(start)
+            start = 0;
+        else if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH) {
+            if(!dyn->insts[ninst].x64.may_set) {
+                dyn->insts[ninst].need_nat_flags = 0;
+                dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED;
+                return;
+            }
+        }
+        dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_CANCELED;
+        #if 0
+        // check forward
+        if(dyn->insts[ninst].x64.has_next && dyn->insts[ninst+1].need_nat_flags)
+            unmarkNativeFlags(dyn, ninst+1, 1);
+        if(dyn->insts[ninst].x64.jmp && dyn->insts[ninst].x64.jmp_insts!=-1) {
+            int jmp = dyn->insts[ninst].x64.jmp_insts;
+            if(dyn->insts[jmp].need_nat_flags)
+                unmarkNativeFlags(dyn, jmp, 1);
+        }
+        #endif
+        // check if stop
+        if(((dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)?dyn->insts[ninst].before_nat_flags:dyn->insts[ninst].need_nat_flags)==0)
+                return;
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH)   // can happens on operation that read and generate flags
+            dyn->insts[ninst].before_nat_flags = 0;
+        else
+            dyn->insts[ninst].need_nat_flags = 0;
+        if(!flag2native(dyn->insts[ninst].x64.need_before)) return;
+        if(!dyn->insts[ninst].pred_sz)
+            return;
+        for(int i=1; i<dyn->insts[ninst].pred_sz; ++i)
+            unmarkNativeFlags(dyn, dyn->insts[ninst].pred[i], 0);
+        if(!dyn->insts[ninst].x64.has_next)
+            return;
+        ninst = dyn->insts[ninst].pred[0];
+    }
+}
+
+static void propagateNativeFlags(dynarec_native_t* dyn, int ninst)
+{
+    uint8_t flags = dyn->insts[ninst].use_nat_flags&flag2native(dyn->insts[ninst].x64.need_before);
+    uint8_t flags_after = flag2native(dyn->insts[ninst].x64.need_after);
+    int marked_flags = markNativeFlags(dyn, ninst, flags, 1);
+    if(!marked_flags) {
+//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because marked_flags is 0\n", ninst);
+        unmarkNativeFlags(dyn, ninst, 1);
+        return;
+    }
+    uint8_t need_flags;
+    // check if all next have the correct flag, or if using non-native flags while native are used
+    if(dyn->insts[ninst].x64.has_next && (flags_after&marked_flags)) {
+        need_flags = dyn->insts[ninst+1].nat_flags_op?dyn->insts[ninst+1].before_nat_flags:dyn->insts[ninst+1].need_nat_flags;    // native flags used
+        flags_after = flag2native(dyn->insts[ninst+1].x64.need_before)&~need_flags; // flags that are needs to be x86
+        if((need_flags&~marked_flags) || (!need_flags && (flags_after&marked_flags))) {
+//printf_log(LOG_INFO, "unmarkNativeFlags ninst=%d, because: need_flags=%hhx, flag_after=%hhx, marked_flags=%hhx\n", ninst, need_flags, flags_after, marked_flags);
+            unmarkNativeFlags(dyn, ninst, 1);
+            return;
+        }
+    }
+    #if 0
+    // check at jump point, as native flags are not converted
+    int jmp = dyn->insts[ninst].x64.jmp_insts;
+    if(dyn->insts[ninst].x64.jmp && jmp!=-1) {
+        need_flags = dyn->insts[jmp].need_nat_flags;
+        flags_after = flag2native(dyn->insts[jmp].x64.need_before);
+        if(((need_flags&flags_after)!=need_flags) || (!need_flags && (flags_after&marked_flags))) {
+            unmarkNativeFlags(dyn, ninst, 1);
+            return;
+        }
+    }
+    #endif
+}
+
+void updateNatveFlags(dynarec_native_t* dyn)
+{
+    // backward check if native flags are used
+    for(int ninst=dyn->size-1; ninst>=0; --ninst)
+        if(dyn->insts[ninst].use_nat_flags) {
+            propagateNativeFlags(dyn, ninst);
+        }
+}
+
+void rasNativeState(dynarec_arm_t* dyn, int ninst)
+{
+    dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].set_nat_flags = dyn->insts[ninst].use_nat_flags = dyn->insts[ninst].need_nat_flags = 0;
+}
+
+int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst)
+{
+    int jmp = dyn->insts[ninst].x64.jmp_insts;
+    if(jmp<0)
+        return 0;
+    if(!dyn->insts[ninst].x64.need_after || !dyn->insts[jmp].x64.need_before)
+        return 0;
+    if(dyn->insts[ninst].set_nat_flags)
+        return 0;
+    uint8_t flags_before = dyn->insts[ninst].need_nat_flags;
+    uint8_t flags_after = dyn->insts[jmp].need_nat_flags;
+    if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH)
+        flags_after = dyn->insts[jmp].before_nat_flags;
+    uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before);
+    flags_x86 &= ~flags_after;
+    // all flags_after should be present and none remaining flags_x86 
+    if(((flags_before&flags_after)!=flags_after) || (flags_before&flags_x86))
+        return 1;
+    return 0;
+}
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_functions.h b/src/dynarec/arm64/dynarec_arm64_functions.h
index e87da74c..446c1cb1 100644
--- a/src/dynarec/arm64/dynarec_arm64_functions.h
+++ b/src/dynarec/arm64/dynarec_arm64_functions.h
@@ -51,6 +51,16 @@ int neoncache_combine_st(dynarec_arm_t* dyn, int ninst, int a, int b);  // with
 // Do not allow i64 type
 int neoncache_no_i64(dynarec_arm_t* dyn, int ninst, int st, int a);
 
+// transform x86 flags to native flags
+uint8_t flag2native(uint8_t flags);
+// mark a instruction as using/generating flags. return flag
+uint8_t mark_natflag(dynarec_arm_t* dyn, int ninst, uint8_t flag);
+// propage the use of nativeflags or not (done between step 0 and step 1)
+void updateNatveFlags(dynarec_arm_t* dyn);
+// raz arm speicifc state when an opcode is unused
+void rasNativeState(dynarec_arm_t* dyn, int ninst);
+// check if natives flags needs some tranform to/from x86 flags
+int nativeFlagsNeedsTransform(dynarec_arm_t* dyn, int ninst);
 
 // FPU Cache transformation (for loops) // Specific, need to be written by backend
 int fpuCacheNeedsTransform(dynarec_arm_t* dyn, int ninst);
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.c b/src/dynarec/arm64/dynarec_arm64_helper.c
index f4497b61..c6493015 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.c
+++ b/src/dynarec/arm64/dynarec_arm64_helper.c
@@ -754,6 +754,9 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst, int is32bits, int is64bits)
 void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int savereg)
 {
     MAYUSE(fnc);
+    #if STEP == 0
+    dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE;
+    #endif
     if(savereg==0)
         savereg = 7;
     if(saveflags) {
@@ -799,6 +802,9 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
 void call_n(dynarec_arm_t* dyn, int ninst, void* fnc, int w)
 {
     MAYUSE(fnc);
+    #if STEP == 0
+    dyn->insts[ninst].nat_flags_op = NAT_FLAG_OP_UNUSABLE;
+    #endif
     STRx_U12(xFlags, xEmu, offsetof(x64emu_t, eflags));
     fpu_pushcache(dyn, ninst, x3, 1);
     // x9..x15, x16,x17,x18 those needs to be saved by caller
@@ -2380,11 +2386,63 @@ static void flagsCacheTransform(dynarec_arm_t* dyn, int ninst, int s1)
     }
 }
 
+static void nativeFlagsTransform(dynarec_arm_t* dyn, int ninst, int s1, int s2)
+{
+    int j64;
+    int jmp = dyn->insts[ninst].x64.jmp_insts;
+    if(jmp<0)
+        return;
+    uint8_t flags_before = dyn->insts[ninst].need_nat_flags;
+    uint8_t flags_after = dyn->insts[jmp].need_nat_flags;
+    if(dyn->insts[jmp].nat_flags_op==NAT_FLAG_OP_TOUCH)
+        flags_after = dyn->insts[jmp].before_nat_flags;
+    uint8_t flags_x86 = flag2native(dyn->insts[jmp].x64.need_before);
+    flags_x86 &= ~flags_after;
+    MESSAGE(LOG_DUMP, "\tFNative flags transform ---- ninst=%d -> %d %hhx -> %hhx/%hhx\n", ninst, jmp, flags_before, flags_after, flags_x86);
+    // flags present in before and missing in after
+    if((flags_before&NF_EQ) && (flags_x86&NF_EQ)) {
+        CSETw(s1, cEQ);
+        BFIw(xFlags, s1, F_ZF, 1);
+    }
+    if((flags_before&NF_SF) && (flags_x86&NF_SF)) {
+        CSETw(s1, cMI);
+        BFIw(xFlags, s1, F_SF, 1);
+    }
+    if((flags_before&NF_VF) && (flags_x86&NF_VF)) {
+        CSETw(s1, cVS);
+        BFIw(xFlags, s1, F_OF, 1);
+    }
+    // flags missing and needed later
+    int mrs = 0;
+    #define GO_MRS(A)   if(!mrs) {mrs=1; MRS_nzvc(s2); }
+    if(!(flags_before&NF_EQ) && (flags_after&NF_EQ)) {
+        GO_MRS(s2);
+        BFIw(s1, xFlags, F_ZF, 1);
+        BFIx(s2, s1, NZCV_Z, 1);
+    }
+    if(!(flags_before&NF_SF) && (flags_after&NF_SF)) {
+        GO_MRS(s2);
+        BFIw(s1, xFlags, F_SF, 1);
+        BFIx(s2, s1, NZCV_N, 1);
+    }
+    if(!(flags_before&NF_VF) && (flags_after&NF_VF)) {
+        GO_MRS(s2);
+        BFIw(s1, xFlags, F_OF, 1);
+        BFIx(s2, s1, NZCV_V, 1);
+    }
+    #undef GL_MRS
+    if(mrs) MSR_nzvc(s2);
+
+    MESSAGE(LOG_DUMP, "\tF---- Native flags transform\n");
+}
+
 void CacheTransform(dynarec_arm_t* dyn, int ninst, int cacheupd, int s1, int s2, int s3) {
     if(cacheupd&1)
         flagsCacheTransform(dyn, ninst, s1);
     if(cacheupd&2)
         fpuCacheTransform(dyn, ninst, s1, s2, s3);
+    if(cacheupd&4)
+        nativeFlagsTransform(dyn, ninst, s1, s2);
 }
 
 void fpu_reflectcache(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3)
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index d3451182..47c3b806 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -32,6 +32,10 @@
 #define PK64(a)   *(uint64_t*)(addr+a)
 #define PKip(a)   *(uint8_t*)(ip+a)
 
+#ifndef FEMIT
+#define FEMIT(A)    EMIT(A)
+#endif
+
 // Strong mem emulation helpers
 #define SMREAD_VAL  4
 #define SMWRITE2_MIN 1
@@ -884,12 +888,20 @@
     j64 = GETMARKLOCK-(dyn->native_size);  \
     CBNZx(reg, j64)
 
+#ifndef IFNATIVE
+#define IFNATIVE(A)     if(dyn->insts[ninst].need_nat_flags&(A))
+#define IFNATIVEN(A)    if((dyn->insts[ninst].need_nat_flags&(A))==(A))
+#endif
+
+#ifndef IFX
 #define IFX(A)  if((dyn->insts[ninst].x64.gen_flags&(A)))
 #define IFX2(A, B)  if((dyn->insts[ninst].x64.gen_flags&(A)) B)
 #define IFX_PENDOR0  if((dyn->insts[ninst].x64.gen_flags&(X_PEND) || !dyn->insts[ninst].x64.gen_flags))
 #define IFXX(A) if((dyn->insts[ninst].x64.gen_flags==(A)))
 #define IFX2X(A, B) if((dyn->insts[ninst].x64.gen_flags==(A) || dyn->insts[ninst].x64.gen_flags==(B) || dyn->insts[ninst].x64.gen_flags==((A)|(B))))
 #define IFXN(A, B)  if((dyn->insts[ninst].x64.gen_flags&(A) && !(dyn->insts[ninst].x64.gen_flags&(B))))
+#define IFXNATIVE(X, N) if((dyn->insts[ninst].x64.gen_flags&(X)) && (dyn->insts[ninst].need_nat_flags&(N)))
+#endif
 
 // Generate FCOM with s1 and s2 scratch regs (the VCMP is already done)
 #define FCOM(s1, s2, s3)                                                    \
@@ -1660,13 +1672,21 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
 #define GOCOND(B, T1, T2)                                   \
     case B+0x0:                                             \
         INST_NAME(T1 "O " T2);                              \
+        IFNATIVE(NF_VF) {                                   \
+        GO( , cVC, cVS, X_OF)                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0b010101, 0)                  \
             , cEQ, cNE, X_OF)                               \
+        }                                                   \
         break;                                              \
     case B+0x1:                                             \
         INST_NAME(T1 "NO " T2);                             \
+        IFNATIVE(NF_VF) {                                   \
+        GO( , cVS, cVC, X_OF)                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0b010101, 0)                  \
             , cNE, cEQ, X_OF)                               \
+        }                                                   \
         break;                                              \
     case B+0x2:                                             \
         INST_NAME(T1 "C " T2);                              \
@@ -1680,13 +1700,21 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         break;                                              \
     case B+0x4:                                             \
         INST_NAME(T1 "Z " T2);                              \
+        IFNATIVE(NF_EQ) {                                   \
+        GO( , cNE, cEQ, X_ZF)                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0b011010, 0)                  \
             , cEQ, cNE, X_ZF)                               \
+        }                                                   \
         break;                                              \
     case B+0x5:                                             \
         INST_NAME(T1 "NZ " T2);                             \
+        IFNATIVE(NF_EQ) {                                   \
+        GO( , cEQ, cNE, X_ZF)                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0b011010, 0)                  \
             , cNE, cEQ, X_ZF)                               \
+        }                                                   \
         break;                                              \
     case B+0x6:                                             \
         INST_NAME(T1 "BE " T2);                             \
@@ -1702,13 +1730,21 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         break;                                              \
     case B+0x8:                                             \
         INST_NAME(T1 "S " T2);                              \
+        IFNATIVE(NF_SF) {                                   \
+        GO( , cPL, cMI, X_SF)                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0b011001, 0)                  \
             , cEQ, cNE, X_SF)                               \
+        }                                                   \
         break;                                              \
     case B+0x9:                                             \
         INST_NAME(T1 "NS " T2);                             \
+        IFNATIVE(NF_SF) {                                   \
+        GO( , cMI, cPL, X_SF)                               \
+        } else {                                            \
         GO( TSTw_mask(xFlags, 0b011001, 0)                  \
             , cNE, cEQ, X_SF)                               \
+        }                                                   \
         break;                                              \
     case B+0xA:                                             \
         INST_NAME(T1 "P " T2);                              \
@@ -1722,29 +1758,45 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         break;                                              \
     case B+0xC:                                             \
         INST_NAME(T1 "L " T2);                              \
+        IFNATIVEN(NF_SF|NF_VF) {                            \
+        GO( , cGE, cLT, X_SF|X_OF)                          \
+        } else {                                            \
         GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);    \
             TSTw_mask(x1, 0b010101, 0)                      \
             , cEQ, cNE, X_SF|X_OF)                          \
+        }                                                   \
         break;                                              \
     case B+0xD:                                             \
         INST_NAME(T1 "GE " T2);                             \
+        IFNATIVEN(NF_SF|NF_VF) {                            \
+        GO( , cLT, cGE, X_SF|X_OF)                          \
+        } else {                                            \
         GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);    \
             TSTw_mask(x1, 0b010101, 0)                      \
             , cNE, cEQ, X_SF|X_OF)                          \
+        }                                                   \
         break;                                              \
     case B+0xE:                                             \
         INST_NAME(T1 "LE " T2);                             \
+        IFNATIVEN(NF_SF|NF_VF|NF_EQ) {                      \
+        GO( , cGT, cLE, X_SF|X_OF|X_ZF)                     \
+        } else {                                            \
         GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);    \
             ORRw_REG_LSL(x1, x1, xFlags, F_OF-F_ZF);        \
             TSTw_mask(x1, 0b010101, 0)                      \
             , cEQ, cNE, X_SF|X_OF|X_ZF)                     \
+        }                                                   \
         break;                                              \
     case B+0xF:                                             \
         INST_NAME(T1 "G " T2);                              \
+        IFNATIVEN(NF_SF|NF_VF|NF_EQ) {                      \
+        GO( , cLE, cGT, X_SF|X_OF|X_ZF)                     \
+        } else {                                            \
         GO( EORw_REG_LSL(x1, xFlags, xFlags, F_OF-F_SF);    \
             ORRw_REG_LSL(x1, x1, xFlags, F_OF-F_ZF);        \
             TSTw_mask(x1, 0b010101, 0)                      \
             , cNE, cEQ, X_SF|X_OF|X_ZF)                     \
+        }                                                   \
         break
 
 #define NOTEST(s1)                                          \
@@ -1775,18 +1827,24 @@ uintptr_t dynarec64_AVX_F3_0F38(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip
         if(arm64_flagm) {                       \
             SETF##A(s1);                        \
             IFX(X_ZF) {                         \
+                IFNATIVE(NF_EQ) {} else {       \
                 CSETw(s3, cEQ);                 \
                 BFIw(xFlags, s3, F_ZF, 1);      \
+                }                               \
             }                                   \
             IFX(X_SF) {                         \
+                IFNATIVE(NF_SF) {} else {       \
                 CSETw(s3, cMI);                 \
                 BFIw(xFlags, s3, F_SF, 1);      \
+                }                               \
             }                                   \
         } else {                                \
             IFX(X_ZF) {                         \
                 ANDSw_mask(s1, s1, 0, (A)-1);   \
+                IFNATIVE(NF_EQ) {} else {       \
                 CSETw(s3, cEQ);                 \
                 BFIw(xFlags, s3, F_ZF, 1);      \
+                }                               \
             }                                   \
             IFX(X_SF) {                         \
                 LSRw(s3, s1, (A)-1);            \
diff --git a/src/dynarec/arm64/dynarec_arm64_pass0.h b/src/dynarec/arm64/dynarec_arm64_pass0.h
index a9dd57e5..d03223ed 100644
--- a/src/dynarec/arm64/dynarec_arm64_pass0.h
+++ b/src/dynarec/arm64/dynarec_arm64_pass0.h
@@ -30,6 +30,8 @@
 #define INST_EPILOG                             \
         dyn->insts[ninst].f_exit = dyn->f;      \
         dyn->insts[ninst].n = dyn->n;           \
+        if(dyn->insts[ninst].nat_flags_op==NAT_FLAG_OP_TOUCH && !dyn->insts[ninst].set_nat_flags)       \
+                dyn->insts[ninst].nat_flags_op=NAT_FLAG_OP_UNUSABLE;                                    \
         dyn->insts[ninst].x64.has_next = (ok>0)?1:0;
 #define INST_NAME(name) 
 #define DEFAULT                         \
@@ -47,3 +49,14 @@
         printFunctionAddr(ip, " => ");  \
         dynarec_log(LOG_NONE, "\n");    \
         }
+
+#define FEMIT(A)        dyn->insts[ninst].nat_flags_op = dyn->insts[ninst].x64.set_flags?NAT_FLAG_OP_TOUCH:NAT_FLAG_OP_UNUSABLE
+#define IFNATIVE(A)     if(mark_natflag(dyn, ninst, A))
+#define IFNATIVEN(A)    if(mark_natflag(dyn, ninst, A))
+#define IFX(A)  if((dyn->insts[ninst].x64.set_flags&(A)))
+#define IFX2(A, B)  if((dyn->insts[ninst].x64.set_flags&(A)) B)
+#define IFX_PENDOR0  if((dyn->insts[ninst].x64.set_flags&(X_PEND) || !dyn->insts[ninst].x64.set_flags))
+#define IFXX(A) if((dyn->insts[ninst].x64.set_flags==(A)))
+#define IFX2X(A, B) if((dyn->insts[ninst].x64.set_flags==(A) || dyn->insts[ninst].x64.set_flags==(B) || dyn->insts[ninst].x64.set_flags==((A)|(B))))
+#define IFXN(A, B)  if((dyn->insts[ninst].x64.set_flags&(A) && !(dyn->insts[ninst].x64.set_flags&(B))))
+#define IFXNATIVE(X, N)  if((dyn->insts[ninst].x64.set_flags&(X)) && mark_natflag(dyn, ninst, N))
\ No newline at end of file
diff --git a/src/dynarec/arm64/dynarec_arm64_private.h b/src/dynarec/arm64/dynarec_arm64_private.h
index 840fb47f..1ea9f658 100644
--- a/src/dynarec/arm64/dynarec_arm64_private.h
+++ b/src/dynarec/arm64/dynarec_arm64_private.h
@@ -9,6 +9,19 @@ typedef struct instsize_s instsize_t;
 
 #define BARRIER_MAYBE   8
 
+#define NF_EQ   (1<<0)
+#define NF_SF   (1<<1)
+#define NF_VF   (1<<2)
+
+// Nothing happens to the native flags
+#define NAT_FLAG_OP_NONE        0
+// Native flags are touched on this opcode
+#define NAT_FLAG_OP_TOUCH       1
+// Native flags are destroyed and unusable
+#define NAT_FLAG_OP_UNUSABLE    2
+// Native flags usaged are canceled here
+#define NAT_FLAG_OP_CANCELED    3
+
 #define NEON_CACHE_NONE     0
 #define NEON_CACHE_ST_D     1
 #define NEON_CACHE_ST_F     2
@@ -94,6 +107,11 @@ typedef struct instruction_arm64_s {
     uint8_t             barrier_maybe;
     uint8_t             will_write;
     uint8_t             last_write;
+    uint8_t             set_nat_flags;  // 0 or combinaison of native flags define
+    uint8_t             use_nat_flags;  // 0 or combinaison of native flags define
+    uint8_t             nat_flags_op;// what happens to native flags here
+    uint8_t             before_nat_flags;  // 0 or combinaison of native flags define
+    uint8_t             need_nat_flags;
     flagcache_t         f_exit;     // flags status at end of instruction
     neoncache_t         n;          // neoncache at end of instruction (but before poping)
     flagcache_t         f_entry;    // flags status before the instruction begin
@@ -154,9 +172,11 @@ void CreateJmpNext(void* addr, void* next);
 #define GO_TRACE(A, B, s0)  \
     GETIP(addr);            \
     MOVx_REG(x1, xRIP);     \
+    MRS_nzvc(s0);           \
     STORE_XEMU_CALL(xRIP);  \
     MOV32w(x2, B);          \
-    CALL(A, -1);            \
+    CALL_(A, -1, s0);       \
+    MSR_nzvc(s0);           \
     LOAD_XEMU_CALL(xRIP)
 
 #endif //__DYNAREC_ARM_PRIVATE_H_
diff --git a/src/dynarec/dynarec_arch.h b/src/dynarec/dynarec_arch.h
index c9de4b8f..3790a5db 100644
--- a/src/dynarec/dynarec_arch.h
+++ b/src/dynarec/dynarec_arch.h
@@ -9,14 +9,19 @@
 #define ADDITIONNAL_DEFINITION()  \

     int fpuCacheNeedsTransform(dynarec_native_t* dyn, int ninst);

 

-#define OTHER_CACHE()   \

-    if (fpuCacheNeedsTransform(dyn, ninst)) ret|=2;

+#define OTHER_CACHE()                                   \

+    if (fpuCacheNeedsTransform(dyn, ninst)) ret|=2;     \

+    if (nativeFlagsNeedsTransform(dyn, ninst)) ret|=4;

 

 #include "arm64/arm64_printer.h"

 #include "arm64/dynarec_arm64_private.h"

 #include "arm64/dynarec_arm64_functions.h"

 // Limit here is defined by LD litteral, that is 19bits

 #define MAXBLOCK_SIZE ((1<<19)-200)

+

+#define RAZ_SPECIFIC(A, N)      rasNativeState(A, N)

+#define UPDATE_SPECIFICS(A)     updateNatveFlags(A)

+

 #elif defined(LA64)

 

 #define instruction_native_t        instruction_la64_t

@@ -33,6 +38,9 @@
 #include "la64/dynarec_la64_functions.h"

 // Limit here is unconditionnal jump, that is signed 28bits

 #define MAXBLOCK_SIZE ((1 << 27) - 200)

+

+#define RAZ_SPECIFIC(A, N)

+#define UPDATE_SPECIFICS(A)

 #elif defined(RV64)

 

 #define instruction_native_t        instruction_rv64_t

@@ -51,6 +59,9 @@
 #include "rv64/dynarec_rv64_functions.h"

 // Limit here is unconditionnal jump, that is signed 21bits

 #define MAXBLOCK_SIZE ((1<<20)-200)

+

+#define RAZ_SPECIFIC(A, N)

+#define UPDATE_SPECIFICS(A)

 #else

 #error Unsupported platform

 #endif

diff --git a/src/dynarec/dynarec_native.c b/src/dynarec/dynarec_native.c
index dc0e1830..e42b98f2 100644
--- a/src/dynarec/dynarec_native.c
+++ b/src/dynarec/dynarec_native.c
@@ -664,6 +664,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
             while(ii<helper.size && !helper.insts[ii].pred_sz) {
                 fpu_reset_ninst(&helper, ii);
                 helper.insts[ii].ymm0_in = helper.insts[ii].ymm0_sub = helper.insts[ii].ymm0_add = helper.insts[ii].ymm0_out = helper.insts[ii].purge_ymm = 0;
+                RAZ_SPECIFIC(&helper, ii);
                 ++ii;
             }
             i = ii;
@@ -680,6 +681,7 @@ void* FillBlock64(dynablock_t* block, uintptr_t addr, int alternate, int is32bit
         return CreateEmptyBlock(block, addr, is32bits);
     }
     updateYmm0s(&helper, 0, 0);
+    UPDATE_SPECIFICS(&helper);
 
 
     // pass 1, float optimizations, first pass for flags