about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-15 17:14:51 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-15 17:14:51 +0100
commit14ec226747d0e62d8be0684550859c2583eef99e (patch)
treeec10cbc874a5e54b7649f58a4ef6c048c6c7f63c /src
parentaebad0cf18d8cd117af8e6bfb704d5abcc3bedc8 (diff)
downloadbox64-14ec226747d0e62d8be0684550859c2583eef99e.tar.gz
box64-14ec226747d0e62d8be0684550859c2583eef99e.zip
Added 29 opcode
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h40
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c9
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_math.c1863
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h47
4 files changed, 1941 insertions, 18 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index b75cd9af..46b14eb3 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -66,6 +66,26 @@
 #define xZR     31
 #define wZR     xZR
 
+// conditions
+#define cEQ 0b0000
+#define cNE 0b0001
+#define cCS 0b0010
+#define cHS cCS
+#define cCC 0b0011
+#define cLO cCC
+#define cMI 0b0100
+#define cPL 0b0101
+#define cVS 0b0110
+#define cVC 0b0111
+#define cHI 0b1000
+#define cLS 0b1001
+#define cGE 0b1010
+#define cLT 0b1011
+#define cGT 0b1100
+#define cLE 0b1101
+#define c__ 0b1110
+
+
 // MOVZ
 #define MOVZ_gen(sf, hw, imm16, Rd)         ((sf)<<31 | 0b10<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd))
 #define MOVZx(Rd, imm16)                    EMIT(MOVZ_gen(1, 0, (imm16)&0xffff, Rd))
@@ -117,6 +137,7 @@
 #define SUBw_REG(Rd, Rn, Rm)                EMIT(ADDSUB_REG_gen(0, 1, 0, 0b00, Rm, 0, Rn, Rd))
 #define SUBSw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(0, 1, 1, 0b00, Rm, 0, Rn, Rd))
 #define SUBxw_REG(Rd, Rn, Rm)               EMIT(ADDSUB_REG_gen(rex.w, 1, 0, 0b00, Rm, 0, Rn, Rd))
+#define SUBSxw_REG(Rd, Rn, Rm)              EMIT(ADDSUB_REG_gen(rex.w, 1, 1, 0b00, Rm, 0, Rn, Rd))
 
 #define SUBx_U12(Rd, Rn, imm12)     EMIT(ADDSUB_IMM_gen(1, 1, 0, 0, (imm12)&0xfff, Rn, Rd))
 #define SUBSx_U12(Rd, Rn, imm12)    EMIT(ADDSUB_IMM_gen(1, 1, 0, 0, (imm12)&0xfff, Rn, Rd))
@@ -189,6 +210,9 @@
 #define CBZx(Rt, imm19)                 EMIT(CB_gen(1, 0, ((imm19)>>2)&0x80000, Rt))
 #define CBZw(Rt, imm19)                 EMIT(CB_gen(0, 0, ((imm19)>>2)&0x80000, Rt))
 
+#define Bcond_gen(imm19, cond)          (0b0101010<<25 | (imm19)<<5 | (cond))
+#define Bcond(cond, imm19)              EMIT(Bcond_gen(((imm19)>>2)&0x80000, cond))
+
 // AND / ORR
 #define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd)  ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn) | Rd)
 #define ANDx_U13(Rd, Rn, imm13)         EMIT(LOGIC_gen(1, 0b00, ((imm13)>>12)&1, (imm13)&0b111111, ((imm13)>>6)&0b111111, Rn, Rd))
@@ -201,6 +225,7 @@
 #define LOGIC_REG_gen(sf, opc, shift, N, Rm, imm6, Rn, Rd)    ((sf)<<31 | (opc)<<29 | 0b01010<<24 | (shift)<<22 | (N)<<21 | (Rm)<<16 | (imm6)<<10 | (Rn)<<5 | (Rd))
 #define ANDx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
 #define ANDw_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
+#define ANDxw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 0, Rm, 0, Rn, Rd))
 #define ANDSx_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(1, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
 #define ANDSw_REG(Rd, Rn, Rm)           EMIT(LOGIC_REG_gen(0, 0b11, 0b00, 0, Rm, 0, Rn, Rd))
 #define ORRx_REG(Rd, Rn, Rm)            EMIT(LOGIC_REG_gen(1, 0b01, 0b00, 0, Rm, 0, Rn, Rd))
@@ -220,15 +245,28 @@
 #define MVNxw(Rm, Rd)                   ORNxw_REG(Rd, xZR, Rm)
 #define MOV_frmSP(Rd)                   ADDx_U12(Rd, xSP, 0)
 #define MOV_toSP(Rm)                    ADDx_U12(xSP, Rm, 0)
+#define BICx(Rd, Rn, Rm)                EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICw(Rd, Rn, Rm)                EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICSx(Rd, Rn, Rm)               EMIT(LOGIC_REG_gen(1, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICSw(Rd, Rn, Rm)               EMIT(LOGIC_REG_gen(0, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICxw(Rd, Rn, Rm)               EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICSxw(Rd, Rn, Rm)              EMIT(LOGIC_REG_gen(rex.w, 0b00, 0b00, 1, Rm, 0, Rn, Rd))
+#define BICx_REG    BICx
+#define BICw_REG    BICw
+#define BICxw_REG   BICxw
+
 
 // BFI
 #define BFM_gen(sf, opc, N, immr, imms, Rn, Rd) ((sf)<<31 | (opc)<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
 #define BFMx(Rd, Rn, immr, imms)        EMIT(BFM_gen(1, 0b01, 1, immr, imms, Rn, Rd))
 #define BFMw(Rd, Rn, immr, imms)        EMIT(BFM_gen(0, 0b01, 0, immr, imms, Rn, Rd))
+#define BFMxw(Rd, Rn, immr, imms)       EMIT(BFM_gen(rex.w, 0b01, rex.w, immr, imms, Rn, Rd))
 #define BFIx(Rd, Rn, lsb, width)        BFMx(Rd, Rn, (-lsb)%64, (width)-1)
 #define BFIw(Rd, Rn, lsb, width)        BFMw(Rd, Rn, (-lsb)%32, (width)-1)
+#define BFIxw(Rd, Rn, lsb, width)       BFMxw(Rd, Rn, (-lsb)%(rex.w?64:32), (width)-1)
 #define BFCx(Rd, Rn, lsb, width)        BFMx(Rd, xZR, (-lsb)%64, (width)-1)
 #define BFCw(Rd, Rn, lsb, width)        BFMw(Rd, xZR, (-lsb)%32, (width)-1)
+#define BFCxw(Rd, Rn, lsb, width)       BFMxw(Rd, xZR, (-lsb)%(rex.w?64:32), (width)-1)
 
 // UBFX
 #define UBFM_gen(sf, N, immr, imms, Rn, Rd)    ((sf)<<31 | 0b10<<29 | 0b100110<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | (Rd))
@@ -240,11 +278,13 @@
 #define UXTHw(Rd, Rn)                   EMIT(UBFM_gen(0, 1, 0, 15, Rn, Rd))
 #define LSRx(Rd, Rn, shift)             EMIT(UBFM_gen(1, 1, shift, 63, Rn, Rd))
 #define LSRw(Rd, Rn, shift)             EMIT(UBFM_gen(0, 0, shift, 31, Rn, Rd))
+#define LSRxw(Rd, Rn, shift)            EMIT(UBFM_gen(rex.w, rex.w, shift, (rex.w)?63:31, Rn, Rd))
 
 // LSRV
 #define LSRV_gen(sf, Rm, op2, Rn, Rd)   ((sf)<<31 | 0b11010110<<21 | (Rm)<<16 | 0b0010<<12 | (op2)<<10 | (Rn)<<5 | (Rd))
 #define LSRx_REG(Rd, Rn, Rm)            EMIT(LSRV_gen(1, Rm, 0b01, Rn, Rd))
 #define LSRw_REG(Rd, Rn, Rm)            EMIT(LSRV_gen(0, Rm, 0b01, Rn, Rd))
+#define LSRxw_REG(Rd, Rn, Rm)           EMIT(LSRV_gen(rex.w, Rm, 0b01, Rn, Rd))
 
 // MRS
 #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt)  (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt))
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index d88f7ac2..53b8f43e 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -57,6 +57,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
 
     switch(opcode) {
 
+        case 0x29:
+            INST_NAME("SUB Ed, Gd");
+            SETFLAGS(X_ALL, SF_SET);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_sub32(dyn, ninst, rex, ed, gd, x3, x4, x5);
+            WBACK;
+            break;
 
         case 0x50:
         case 0x51:
diff --git a/src/dynarec/dynarec_arm64_emit_math.c b/src/dynarec/dynarec_arm64_emit_math.c
new file mode 100755
index 00000000..bc06904f
--- /dev/null
+++ b/src/dynarec/dynarec_arm64_emit_math.c
@@ -0,0 +1,1863 @@
+#include <stdio.h>
+#include <stdlib.h>
+#include <stddef.h>
+#include <pthread.h>
+#include <errno.h>
+
+#include "debug.h"
+#include "box64context.h"
+#include "dynarec.h"
+#include "emu/x64emu_private.h"
+#include "emu/x64run_private.h"
+#include "x64run.h"
+#include "x64emu.h"
+#include "box64stack.h"
+#include "callback.h"
+#include "emu/x64run_private.h"
+#include "x64trace.h"
+#include "dynarec_arm64.h"
+#include "dynarec_arm64_private.h"
+#include "arm64_printer.h"
+#include "../tools/bridge_private.h"
+
+#include "dynarec_arm64_functions.h"
+#include "dynarec_arm64_helper.h"
+
+// emit ADD32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch
+//void emit_add32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_add32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF) {
+//        ORR_REG_LSL_IMM5(s3, s1, s2, 0);    // s3 = op1 | op2
+//        AND_REG_LSL_IMM5(s4, s1, s2, 0);    // s4 = op1 & op2
+//    }
+//    IFX(X_ALL) {
+//        ADDS_REG_LSL_IMM5(s1, s1, s2, 0);
+//    } else {
+//        ADD_REG_LSL_IMM5(s1, s1, s2, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        ORR_IMM8_COND(cCS, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADD32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_add32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+//{
+//    if(s1==xESP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND))
+//    {
+//        // special case when doing math on ESP and only PEND is needed: ignoring it!
+//        if(c>=0 && c<256) {
+//            ADD_IMM8(s1, s1, c);
+//        } else {
+//            MOV32(s3, c);
+//            ADD_REG_LSL_IMM5(s1, s1, s3, 0);
+//        }
+//        return;
+//    }
+//    IFX(X_PEND) {
+//        MOV32(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_add32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF) {
+//        if(c>=0 && c<256) {
+//            ORR_IMM8(s3, s1, c, 0);             // s3 = op1 | op2
+//            AND_IMM8(s4, s1, c);                // s4 = op1 & op2
+//        } else {
+//            IFX(X_PEND) {} else {MOV32(s3, c);}
+//            MOV_REG(s4, s3);
+//            ORR_REG_LSL_IMM5(s3, s1, s3, 0);
+//            AND_REG_LSL_IMM5(s4, s1, s4, 0);
+//            PUSH(xSP, 1<<s3);
+//        }
+//    }
+//    if(c>=0 && c<256) {
+//        IFX(X_ALL) {
+//            ADDS_IMM8(s1, s1, c);
+//        } else {
+//            ADD_IMM8(s1, s1, c);
+//        }
+//    } else {
+//        IFXN(X_PEND, X_AF) {} else {MOV32(s3, c);}
+//        IFX(X_ALL) {
+//            ADDS_REG_LSL_IMM5(s1, s1, s3, 0);
+//        } else {
+//            ADD_REG_LSL_IMM5(s1, s1, s3, 0);
+//        }
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        if(c<0 || c>=256) {
+//            POP(xSP, 1<<s3);
+//        }
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        ORR_IMM8_COND(cCS, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SUB32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch
+void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX(X_PEND) {
+        STRw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRw_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_sub32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    IFX(X_AF) {
+        MVNxw(s3, s1);
+        ORRxw_REG(s3, s3, s2);  // s3 = ~op1 | op2
+        BICxw(s4, s2, s1);      // s4 = ~op1 & op2
+    }
+    IFX(X_ALL) {
+        SUBSxw_REG(s1, s1, s2);
+    } else {
+        SUBxw_REG(s1, s1, s2);
+    }
+    IFX(X_PEND) {
+        STRx_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF) {
+        ANDxw_REG(s3, s3, s1);   // s3 = (~op1 | op2) & res
+        ORRxw_REG(s3, s3, s4);   // s4 = (~op1 & op2) | ((~op1 | op2) & res)
+        LSRxw(s4, s3, 3);
+        BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+    }
+    IFX(X_ZF|X_CF|X_OF) {
+        MOVw(s5, (1<<F_ZF)|(1<<F_CF)|(1<<F_OF));
+        BICx(xFlags, xFlags, s5);
+    }
+    IFX(X_ZF) {
+        Bcond(cNE, +4);
+        ORRw_U12(xFlags, xFlags, 1<<F_ZF);
+    }
+    IFX(X_CF) {
+        // inverted carry
+        Bcond(cCS, +4);
+        ORRw_U12(xFlags, xFlags, 1<<F_CF);
+    }
+    IFX(X_OF) {
+        Bcond(cVC, +4);
+        ORRw_U12(xFlags, xFlags, 1<<F_OF);
+    }
+    IFX(X_SF) {
+        LSRxw(s3, s1, (rex.w)?63:31);
+        BFIx(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
+// emit SUB32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_sub32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+//{
+//    if(s1==xESP && (!dyn->insts || dyn->insts[ninst].x64.need_flags==X_PEND))
+//    {
+//        // special case when doing math on ESP and only PEND is needed: ignoring it!
+//        if(c>=0 && c<256) {
+//            SUB_IMM8(s1, s1, c);
+//        } else {
+//            MOV32(s3, c);
+//            SUB_REG_LSL_IMM5(s1, s1, s3, 0);
+//        }
+//        return;
+//    }
+//    IFX(X_PEND) {
+//        MOV32(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_sub32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF) {
+//        if(c>=0 && c<256) {
+//            MVN_REG_LSL_IMM5(s3, s1, 0);
+//            AND_IMM8(s4, s3, c);                // s4 = ~op1 & op2
+//            ORR_IMM8(s3, s3, c, 0);             // s3 = ~op1 | op2
+//        } else {
+//            IFX(X_PEND) {} else {MOV32(s3, c);}
+//            MVN_REG_LSL_IMM5(s4, s1, 0);
+//            ORR_REG_LSL_IMM5(s3, s4, s3, 0);
+//            MOV32(s4, c);
+//            BIC_REG_LSL_IMM5(s4, s4, s1, 0);
+//            PUSH(xSP, 1<<s3);
+//        }
+//    }
+//    if(c>=0 && c<256) {
+//        IFX(X_ALL) {
+//            SUBS_IMM8(s1, s1, c);
+//        } else {
+//            SUB_IMM8(s1, s1, c);
+//        }
+//    } else {
+//        IFXN(X_PEND, X_AF) {} else {MOV32(s3, c);}
+//        IFX(X_ALL) {
+//            SUBS_REG_LSL_IMM5(s1, s1, s3, 0);
+//        } else {
+//            SUB_REG_LSL_IMM5(s1, s1, s3, 0);
+//        }
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        if(c<0 || c>=256) {
+//            POP(xSP, 1<<s3);
+//        }
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (~op1 & op2) | ((~op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        // Inverted carry
+//        ORR_IMM8_COND(cCC, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADD8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_add8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF | X_OF) {
+//        ORR_REG_LSL_IMM5(s3, s1, s2, 0);    // s3 = op1 | op2
+//        AND_REG_LSL_IMM5(s4, s1, s2, 0);    // s4 = op1 & op2
+//    }
+//    ADD_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_AF|X_OF) {
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 8);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit ADD8 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c&0xff);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_add8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF | X_OF) {
+//        ORR_IMM8(s3, s1, c, 0);     // s3 = op1 | op2
+//        AND_IMM8(s4, s1, c);        // s4 = op1 & op2
+//    }
+//    ADD_IMM8(s1, s1, c);
+//
+//    IFX(X_AF|X_OF) {
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 8);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SUB8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sub8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s3, s1, 0);
+//        ORR_REG_LSL_IMM5(s3, s3, s2, 0);    // s3 = ~op1 | op2
+//        BIC_REG_LSL_IMM5(s4, s2, s1, 0);    // s4 = ~op1 & op2
+//    }
+//
+//    SUB_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 7);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit SUB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c&0xff);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sub8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s3, s1, 0);
+//        MOVW(s4, c&0xff);
+//        ORR_IMM8(s3, s3, c, 0);             // s3 = ~op1 | op2
+//        BIC_REG_LSL_IMM5(s4, s4, s1, 0);    // s4 = ~op1 & op2
+//    }
+//    SUB_IMM8(s1, s1, c);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 7);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADD16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_add16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF | X_OF) {
+//        ORR_REG_LSL_IMM5(s3, s1, s2, 0);    // s3 = op1 | op2
+//        AND_REG_LSL_IMM5(s4, s1, s2, 0);    // s4 = op1 & op2
+//    }
+//    ADD_REG_LSL_IMM5(s1, s1, s2, 0);
+//
+//    IFX(X_AF|X_OF) {
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 16);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit ADD16 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_add16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF | X_OF) {
+//        MOV_REG(s4, s1);
+//    }
+//    if(c>=0 && c<256) {
+//        ADD_IMM8(s1, s1, c);
+//    } else {
+//        IFX(X_PEND) {} else {MOVW(s3, c);}
+//        ADD_REG_LSL_IMM5(s1, s1, s3, 0);
+//    }
+//
+//    IFX(X_AF|X_OF) {
+//        if(c>=0 && c<256) {
+//            ORR_IMM8(s3, s4, c, 0);     // s3 = op1 | op2
+//            AND_IMM8(s4, s4, c);        // s4 = op1 & op2
+//        } else {
+//            ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = op1 | op2
+//            PUSH(xSP, 1<<s3);
+//            MOVW(s3, c);
+//            AND_REG_LSL_IMM5(s4, s4, s3, 0);    // s4 = op1 & op2
+//            POP(xSP, 1<<s3);
+//        }
+//
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 16);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SUB16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sub16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s3, s1, 0);
+//        ORR_REG_LSL_IMM5(s3, s3, s2, 0);    // s3 = ~op1 | op2
+//        BIC_REG_LSL_IMM5(s4, s2, s1, 0);    // s4 = ~op1 & op2
+//    }
+//
+//    SUB_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 15);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit SUB16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_sub16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_sub16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//    if(c>=0 && c<255) {
+//        SUB_IMM8(s1, s1, c);
+//    } else {
+//        IFX(X_PEND) {} else {MOVW(s3, c);}
+//        SUB_REG_LSL_IMM5(s1, s1, s3, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        if(c>=0 && c<256) {
+//            ORR_IMM8(s3, s4, c, 0);     // s3 = ~op1 | op2
+//            AND_IMM8(s4, s4, c);        // s4 = ~op1 & op2
+//        } else {
+//            ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = ~op1 | op2
+//            PUSH(xSP, 1<<s3);
+//            MOVW(s3, c);
+//            AND_REG_LSL_IMM5(s4, s4, s3, 0);    // s4 = ~op1 & op2
+//            POP(xSP, 1<<s3);
+//        }
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 15);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit INC32 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_inc32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s4, d_inc32);
+//    } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF) {
+//        ORR_IMM8(s3, s1, 1, 0);             // s3 = op1 | op2
+//        AND_IMM8(s4, s1, 1);                // s4 = op1 & op2
+//    }
+//    IFX(X_ZF|X_OF) {
+//        ADDS_IMM8(s1, s1, 1);
+//    } else {
+//        ADD_IMM8(s1, s1, 1);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit INC8 instruction, from s1, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_inc8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s3, d_inc8);
+//    } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF | X_OF) {
+//        ORR_IMM8(s3, s1, 1, 0);     // s3 = op1 | op2
+//        AND_IMM8(s4, s1, 1);        // s4 = op1 & op2
+//    }
+//    ADD_IMM8(s1, s1, 1);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit INC16 instruction, from s1 , store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_inc16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s3, d_inc16);
+//    } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF | X_OF) {
+//        MOV_REG(s4, s1);
+//    }
+//    ADD_IMM8(s1, s1, 1);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_IMM8(s3, s4, 1, 0);     // s3 = op1 | op2
+//        AND_IMM8(s4, s4, 1);        // s4 = op1 & op2
+//
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit DEC32 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_dec32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s4, d_dec32);
+//    } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF) {
+//        MVN_REG_LSL_IMM5(s3, s1, 0);
+//        AND_IMM8(s4, s3, 1);                // s4 = ~op1 & op2
+//        ORR_IMM8(s3, s3, 1, 0);             // s3 = ~op1 | op2
+//    }
+//    IFX(X_ZF|X_OF) {
+//        SUBS_IMM8(s1, s1, 1);
+//    } else {
+//        SUB_IMM8(s1, s1, 1);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (~op1 & op2) | ((~op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit DEC8 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_dec8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_dec8);
+//    } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF) {
+//        MVN_REG_LSL_IMM5(s3, s1, 0);
+//        AND_IMM8(s4, s3, 1);        // s4 = ~op1 & op2
+//        ORR_IMM8(s3, s3, 1, 0);     // s3 = ~op1 | op2
+//    }
+//    SUB_IMM8(s1, s1, 1);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit DEC16 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_dec16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s3, d_dec16);
+//    } else IFX(X_ZF|X_OF|X_AF|X_SF|X_PF) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//    SUB_IMM8(s1, s1, 1);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_IMM8(s3, s4, 1, 0);     // s3 = ~op1 | op2
+//        AND_IMM8(s4, s4, 1);        // s4 = ~op1 & op2
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADC32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch
+//void emit_adc32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_adc32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF) {
+//        MOV_REG(s4, s1);
+//    }
+//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC (F_CF==0) into ARM CF
+//    IFX(X_ZF|X_CF|X_OF) {
+//        ADCS_REG_LSL_IMM5(s1, s1, s2, 0);
+//    } else {
+//        ADC_REG_LSL_IMM5(s1, s1, s2, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = op1 | op2
+//        AND_REG_LSL_IMM5(s4, s4, s2, 0);    // s4 = op1 & op2
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        ORR_IMM8_COND(cCS, xFlags, xFlags, 1<<F_CF, 0);
+//        BIC_IMM8_COND(cCC, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADC32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOV32(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_adc32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF) {
+//        MOV_REG(s4, s1);
+//    }
+//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC into ARM CF
+//    if(c>=0 && c<256) {
+//        IFX(X_ZF|X_CF|X_OF) {
+//            ADCS_IMM8(s1, s1, c);
+//        } else {
+//            ADC_IMM8(s1, s1, c);
+//        }
+//    } else {
+//        MOV32(s3, c);
+//        IFX(X_ZF|X_CF|X_OF) {
+//            ADCS_REG_LSL_IMM5(s1, s1, s3, 0);
+//        } else {
+//            ADC_REG_LSL_IMM5(s1, s1, s3, 0);
+//        }
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        if(c>=0 && c<256) {
+//            ORR_IMM8(s3, s4, c, 0);     // s3 = op1 | op2
+//            AND_IMM8(s4, s4, c);        // s4 = op1 & op2
+//        } else {
+//            ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = op1 | op2
+//            PUSH(xSP, 1<<s3);
+//            MOVW(s3, c);
+//            AND_REG_LSL_IMM5(s4, s4, s3, 0);    // s4 = op1 & op2
+//            POP(xSP, 1<<s3);
+//        }
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        ORR_IMM8_COND(cCS, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADC8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_adc8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF | X_OF) {
+//        MOV_REG(s4, s1);
+//    }
+//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC into ARM CF
+//    ADC_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = op1 | op2
+//        AND_REG_LSL_IMM5(s4, s4, s2, 0);    // s4 = op1 & op2
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 8);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit ADC8 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c&0xff);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_adc8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF | X_OF) {
+//        MOV_REG(s4, s1);
+//    }
+//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC into ARM CF
+//    ADC_IMM8(s1, s1, c);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_IMM8(s3, s4, c, 0);     // s3 = op1 | op2
+//        AND_IMM8(s4, s4, c);        // s4 = op1 & op2
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 8);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit ADC16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_adc16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF | X_OF) {
+//        MOV_REG(s4, s1);
+//    }
+//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC into ARM CF
+//    ADC_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = op1 | op2
+//        AND_REG_LSL_IMM5(s4, s4, s2, 0);    // s4 = op1 & op2
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 16);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit ADC16 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_adc16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF | X_OF) {
+//        MOV_REG(s4, s1);
+//    }
+//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC into ARM CF
+//    if(c>=0 && c<256) {
+//        ADC_IMM8(s1, s1, c);
+//    } else {
+//        MOVW(s3, c);
+//        ADC_REG_LSL_IMM5(s1, s1, s3, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        if(c>=0 && c<256) {
+//            ORR_IMM8(s3, s4, c, 0);     // s3 = op1 | op2
+//            AND_IMM8(s4, s4, c);        // s4 = op1 & op2
+//        } else {
+//            ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = op1 | op2
+//            PUSH(xSP, 1<<s3);
+//            MOVW(s3, c);
+//            AND_REG_LSL_IMM5(s4, s4, s3, 0);    // s4 = op1 & op2
+//            POP(xSP, 1<<s3);
+//        }
+//
+//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (op1 & op2) | ((op1 | op2) & ~ res)
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_CF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 16);
+//        BFI(xFlags, s3, F_CF, 1);
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SBB32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch
+//void emit_sbb32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sbb32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//    XOR_IMM8(s3, xFlags, 1);                // invert CC because it's reverted for SUB on ARM
+//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
+//    IFX(X_ZF|X_CF|X_OF) {
+//        SBCS_REG_LSL_IMM5(s1, s1, s2, 0);
+//    } else {
+//        SBC_REG_LSL_IMM5(s1, s1, s2, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = ~op1 | op2
+//        AND_REG_LSL_IMM5(s4, s2, s4, 0);    // s4 = ~op1 & op2
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (~op1 & op2) | ((~op1 | op2) & res)
+//        UBFX(s4, s3, 3, 1);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        // Inverted carry
+//        ORR_IMM8_COND(cCC, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SBB32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOV32(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s4, d_sbb32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s4);
+//    }
+//    IFX(X_AF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
+//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
+//    if(c>=0 && c<256) {
+//        IFX(X_ZF|X_CF|X_OF) {
+//            SBCS_IMM8(s1, s1, c);
+//        } else {
+//            SBC_IMM8(s1, s1, c);
+//        }
+//    } else {
+//        MOV32(s3, c);
+//        IFX(X_ZF|X_CF|X_OF) {
+//            SBCS_REG_LSL_IMM5(s1, s1, s3, 0);
+//        } else {
+//            SBC_REG_LSL_IMM5(s1, s1, s3, 0);
+//        }
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF) {
+//        if(c>=0 && c<256) {
+//            AND_IMM8(s4, s3, c);                // s4 = ~op1 & op2
+//            ORR_IMM8(s3, s3, c, 0);             // s3 = ~op1 | op2
+//        } else {
+//            ORR_REG_LSL_IMM5(s3, s4, s3, 0);
+//            PUSH(xSP, 1<<s3);
+//            MOV32(s3, c);
+//            AND_REG_LSL_IMM5(s4, s3, s4, 0);
+//            POP(xSP, 1<<s3);
+//        }
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (~op1 & op2) | ((~op1 | op2) & ~ res)
+//        UBFX(s4, s3, 3, 1);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_CF) {
+//        // Inverted carry
+//        ORR_IMM8_COND(cCC, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SBB8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sbb8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//
+//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
+//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
+//    SBC_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = ~op1 | op2
+//        AND_REG_LSL_IMM5(s4, s2, s4, 0);    // s4 = ~op1 & op2
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 7);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit SBB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c&0xff);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sbb8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
+//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
+//    SBC_IMM8(s1, s1, c);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        ORR_IMM8(s3, s4, c, 0);             // s3 = ~op1 | op2
+//        AND_IMM8(s4, s4, c);                // s4 = ~op1 & op2
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);    // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 7);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit SBB16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
+//void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sbb16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//
+//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
+//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
+//    SBC_REG_LSL_IMM5(s1, s1, s2, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = ~op1 | op2
+//        AND_REG_LSL_IMM5(s4, s2, s4, 0);    // s4 = ~op1 & op2
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 15);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
+//}
+
+// emit SBB16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+//void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        MOVW(s3, c);
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
+//        SET_DF(s3, d_sbb16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        MVN_REG_LSL_IMM5(s4, s1, 0);
+//    }
+//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
+//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
+//    if(c>=0 && c<255) {
+//        SBC_IMM8(s1, s1, c);
+//    } else {
+//        MOVW(s3, c);
+//        SBC_REG_LSL_IMM5(s1, s1, s3, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF|X_CF) {
+//        if(c>=0 && c<256) {
+//            ORR_IMM8(s3, s4, c, 0);     // s3 = ~op1 | op2
+//            AND_IMM8(s4, s4, c);        // s4 = ~op1 & op2
+//        } else {
+//            ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = ~op1 | op2
+//            PUSH(xSP, 1<<s3);
+//            MOVW(s3, c);
+//            AND_REG_LSL_IMM5(s4, s4, s3, 0);    // s4 = ~op1 & op2
+//            POP(xSP, 1<<s3);
+//        }
+//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
+//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+//        IFX(X_CF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 15);
+//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
+//        }
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit NEG32 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s3, d_neg32);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_CF) {
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cNE, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_AF) {
+//        MOV_REG_LSL_IMM5(s3, s1, 0);
+//    }
+//    IFX(X_ZF|X_OF) {
+//        RSBS_IMM8(s1, s1, 0);
+//    } else {
+//        RSB_IMM8(s1, s1, 0);
+//    }
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_ZF) {
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_OF) {
+//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
+//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
+//    }
+//    IFX(X_AF) {
+//        ORR_REG_LSL_IMM5(s3, s3, s1, 0);                        // bc = op1 | res
+//        MOV_REG_LSR_IMM5(s4, s3, 3);
+//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 31);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit NEG16 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s3, d_neg16);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_CF) {
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cNE, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_AF|X_OF) {
+//        MOV_REG_LSL_IMM5(s3, s1, 0);
+//    }
+//    RSB_IMM8(s1, s1, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_REG_LSL_IMM5(s3, s3, s1, 0);                        // bc = op1 | res
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 14);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        UXTH(s1, s1, 0);
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 15);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
+
+// emit NEG8 instruction, from s1, store result in s1 using s3 and s4 as scratch
+//void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+//{
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
+//        SET_DF(s3, d_neg8);
+//    } else IFX(X_ALL) {
+//        SET_DFNONE(s3);
+//    }
+//    IFX(X_ZF|X_CF) {
+//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
+//    }
+//    IFX(X_CF) {
+//        TSTS_REG_LSL_IMM5(s1, s1, 0);
+//        ORR_IMM8_COND(cNE, xFlags, xFlags, 1<<F_CF, 0);
+//    }
+//    IFX(X_AF|X_OF) {
+//        MOV_REG_LSL_IMM5(s3, s1, 0);
+//    }
+//    RSB_IMM8(s1, s1, 0);
+//    IFX(X_PEND) {
+//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
+//    }
+//    IFX(X_AF|X_OF) {
+//        ORR_REG_LSL_IMM5(s3, s3, s1, 0);                        // bc = op1 | res
+//        IFX(X_AF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 3);
+//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+//        }
+//        IFX(X_OF) {
+//            MOV_REG_LSR_IMM5(s4, s3, 6);
+//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
+//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+//        }
+//    }
+//    IFX(X_ZF) {
+//        ANDS_IMM8(s1, s1, 0xff);
+//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
+//    }
+//    IFX(X_SF) {
+//        MOV_REG_LSR_IMM5(s3, s1, 7);
+//        BFI(xFlags, s3, F_SF, 1);
+//    }
+//    IFX(X_PF) {
+//        emit_pf(dyn, ninst, s1, s3, s4);
+//    }
+//}
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index 6272506e..a9068b48 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -31,19 +31,27 @@
 // GETGD    get x64 register in gd
 #define GETGD   gd = xRAX+((nextop&0x38)>>3)+(rex.r<<3)
 //GETED can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
+#define GETED(D)  if(MODREG) {                          \
+                    ed = xRAX+(nextop&7)+(rex.b<<3);    \
+                    wback = 0;                          \
+                } else {                                \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, rex, 0, D); \
+                    LDRxw_U12(x1, wback, fixedaddress); \
+                    ed = x1;                            \
+                }
 #define GETEDx(D)  if(MODREG) {                         \
                     ed = xRAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, 0, D); \
-                    LDRxw_U12(rex.w, x1, wback, fixedaddress); \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, rex, 0, D); \
+                    LDRx_U12(x1, wback, fixedaddress);  \
                     ed = x1;                            \
                 }
 #define GETEDw(D)  if((nextop&0xC0)==0xC0) {            \
                     ed = xEAX+(nextop&7)+(rex.b<<3);    \
                     wback = 0;                          \
                 } else {                                \
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, 0, D); \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, 0xfff, 0, rex, 0, D); \
                     LDRw_U12(x1, wback, fixedaddress);  \
                     ed = x1;                            \
                 }
@@ -67,7 +75,11 @@
                     LDR_IMM9(ed, wback, fixedaddress); \
                 }
 // Write back ed in wback (if wback not 0)
-#define WBACK       if(wback) {STR_IMM9(ed, wback, fixedaddress);}
+#define WBACK       if(wback) {STRxw_U12(ed, wback, fixedaddress);}
+// Write back ed in wback (if wback not 0)
+#define WBACKx      if(wback) {STRx_U12(ed, wback, fixedaddress);}
+// Write back ed in wback (if wback not 0)
+#define WBACKw      if(wback) {STRw_U12(ed, wback, fixedaddress);}
 // Send back wb to either ed or wback
 #define SBACK(wb)   if(wback) {STR_IMM9(wb, wback, fixedaddress);} else {MOV_REG(ed, wb);}
 //GETEDO can use r1 for ed, and r2 for wback. wback is 0 if ed is xEAX..xEDI
@@ -220,35 +232,35 @@
 
 // Branch to MARK if cond (use j32)
 #define B_MARK(cond)    \
-    j32 = GETMARK-(dyn->arm_size+8);    \
+    j32 = GETMARK-(dyn->arm_size);    \
     Bcond(cond, j32)
 // Branch to MARK2 if cond (use j32)
 #define B_MARK2(cond)    \
-    j32 = GETMARK2-(dyn->arm_size+8);   \
+    j32 = GETMARK2-(dyn->arm_size);   \
     Bcond(cond, j32)
 // Branch to MARK3 if cond (use j32)
 #define B_MARK3(cond)    \
-    j32 = GETMARK3-(dyn->arm_size+8);   \
+    j32 = GETMARK3-(dyn->arm_size);   \
     Bcond(cond, j32)
 // Branch to next instruction if cond (use j32)
 #define B_NEXT(cond)     \
-    j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size+8)):0; \
+    j32 = (dyn->insts)?(dyn->insts[ninst].epilog-(dyn->arm_size)):0; \
     Bcond(cond, j32)
 // Branch to MARKSEG if cond (use j32)
 #define B_MARKSEG(cond)    \
-    j32 = GETMARKSEG-(dyn->arm_size+8);   \
+    j32 = GETMARKSEG-(dyn->arm_size);   \
     Bcond(cond, j32)
 // Branch to MARKSEG if reg is 0 (use j32)
 #define CBZx_MARKSEG(reg)    \
-    j32 = GETMARKSEG-(dyn->arm_size+8);   \
+    j32 = GETMARKSEG-(dyn->arm_size);   \
     CBZx(reg, j32)
 // Branch to MARKSEG if reg is 0 (use j32)
 #define CBZw_MARKSEG(reg)    \
-    j32 = GETMARKSEG-(dyn->arm_size+8);   \
+    j32 = GETMARKSEG-(dyn->arm_size);   \
     CBZw(reg, j32)
 // Branch to MARKLOCK if cond (use j32)
 #define B_MARKLOCK(cond)    \
-    j32 = GETMARKLOCK-(dyn->arm_size+8);   \
+    j32 = GETMARKLOCK-(dyn->arm_size);   \
     Bcond(cond, j32)
 
 #define IFX(A)  if(dyn->insts && (dyn->insts[ninst].x64.need_flags&(A)))
@@ -319,12 +331,11 @@
 #define READFLAGS(A) \
     if(((A)!=X_PEND) && dyn->state_flags!=SF_SET) {     \
         if(dyn->state_flags!=SF_PENDING) {              \
-            LDR_IMM9(x3, xEmu, offsetof(x64emu_t, df)); \
-            TSTS_REG_LSL_IMM5(x3, x3, 0);               \
-            j32 = (GETMARKF)-(dyn->arm_size+8);         \
-            Bcond(cEQ, j32);                            \
+            LDRw_U12(x3, xEmu, offsetof(x64emu_t, df)); \
+            j32 = (GETMARKF)-(dyn->arm_size);           \
+            CBZw(x3, j32);                              \
         }                                               \
-        CALL_(UpdateFlags, -1, 0);                      \
+        CALL_(UpdateFlags, -1);                         \
         MARKF;                                          \
         dyn->state_flags = SF_SET;                      \
         SET_DFOK();                                     \
@@ -534,7 +545,7 @@ void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int save
 //void emit_add32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
 //void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_sub32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 //void emit_sub32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
 //void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);