about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-19 09:49:03 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-19 09:49:03 +0100
commitaf1b5a9a51d9a08a75993df4f354e44f4bfe75bb (patch)
treeb26afc1ca993d7f02de4d59babab87788437f4ef /src
parentf5a7e639360032fa6958fd405635eb84aa937f5b (diff)
downloadbox64-af1b5a9a51d9a08a75993df4f354e44f4bfe75bb.tar.gz
box64-af1b5a9a51d9a08a75993df4f354e44f4bfe75bb.zip
[DYNAREC] Added 80 opcodes
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_emitter.h25
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c75
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_logic.c201
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_math.c410
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_tests.c162
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h51
6 files changed, 508 insertions, 416 deletions
diff --git a/src/dynarec/arm64_emitter.h b/src/dynarec/arm64_emitter.h
index 746135fc..bbf5d420 100755
--- a/src/dynarec/arm64_emitter.h
+++ b/src/dynarec/arm64_emitter.h
@@ -85,6 +85,7 @@
 #define cLE 0b1101
 #define c__ 0b1110
 
+#define invCond(cond)   ((cond)^0b0001)
 
 // MOVZ
 #define MOVZ_gen(sf, hw, imm16, Rd)         ((sf)<<31 | 0b10<<29 | 0b100101<<23 | (hw)<<21 | (imm16)<<5 | (Rd))
@@ -162,12 +163,22 @@
 #define CMPSw_U12(Rn, imm12)        SUBSw_U12(wZR, Rn, imm12)
 #define CMPSxw_U12(Rn, imm12)       SUBSxw_U12(xZR, Rn, imm12)
 
+#define ADDSUBC_gen(sf, op, S, Rm, Rn, Rd)  ((sf)<<31 | (op)<<30 | (S)<<29 | 0b11010000<<21 | (Rm)<<16 | (Rn)<<5 | (Rd))
+#define ADCx_REG(Rd, Rn, Rm)        EMIT(ADDSUBC_gen(1, 0, 0, Rm, Rn, Rd))
+#define ADCw_REG(Rd, Rn, Rm)        EMIT(ADDSUBC_gen(0, 0, 0, Rm, Rn, Rd))
+#define ADCxw_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(rex.w, 0, 0, Rm, Rn, Rd))
+#define SBCx_REG(Rd, Rn, Rm)        EMIT(ADDSUBC_gen(1, 1, 0, Rm, Rn, Rd))
+#define SBCw_REG(Rd, Rn, Rm)        EMIT(ADDSUBC_gen(0, 1, 0, Rm, Rn, Rd))
+#define SBCxw_REG(Rd, Rn, Rm)       EMIT(ADDSUBC_gen(rex.w, 1, 0, Rm, Rn, Rd))
+
 // LDR
 #define LDR_gen(size, op1, imm9, op2, Rn, Rt)    ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm9)<<12 | (op2)<<10 | (Rn)<<5 | (Rt))
 #define LDRx_S9_postindex(Rt, Rn, imm9)   EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
 #define LDRx_S9_preindex(Rt, Rn, imm9)    EMIT(LDR_gen(0b11, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
 #define LDRw_S9_postindex(Rt, Rn, imm9)   EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
 #define LDRw_S9_preindex(Rt, Rn, imm9)    EMIT(LDR_gen(0b10, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
+#define LDRB_S9_postindex(Rt, Rn, imm9)   EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b01, Rn, Rt))
+#define LDRB_S9_preindex(Rt, Rn, imm9)    EMIT(LDR_gen(0b00, 0b00, (imm9)&0x1ff, 0b11, Rn, Rt))
 
 #define LD_gen(size, op1, imm12, Rn, Rt)        ((size)<<30 | 0b111<<27 | (op1)<<24 | 0b01<<22 | (imm12)<<10 | (Rn)<<5 | (Rt))
 #define LDRx_U12(Rt, Rn, imm12)           EMIT(LD_gen(0b11, 0b01, ((uint32_t)(imm12>>3))&0xfff, Rn, Rt))
@@ -261,6 +272,14 @@
 
 #define NOP                             EMIT(0b11010101000000110010000000011111)
 
+#define CSINC_gen(sf, Rm, cond, Rn, Rd)     ((sf)<<31 | 0b11010100<<21 | (Rm)<<16 | (cond)<<12 | 1<<10 | (Rn)<<5 | (Rd))
+#define CSINCx(Rd, Rn, Rm, cond)            EMIT(CSINC_gen(1, Rm, cond, Rn, Rd))
+#define CSINCw(Rd, Rn, Rm, cond)            EMIT(CSINC_gen(0, Rm, cond, Rn, Rd))
+#define CSINCxw(Rd, Rn, Rm, cond)           EMIT(CSINC_gen(rex.w, Rm, cond, Rn, Rd))
+#define CSETx(Rd, cond)                     CSINCx(Rd, xZR, xZR, invCond(cond))
+#define CSETw(Rd, cond)                     CSINCw(Rd, xZR, xZR, invCond(cond))
+#define CSETxw(Rd, cond)                    CSINCxw(Rd, xZR, xZR, invCond(cond))
+
 // AND / ORR
 #define LOGIC_gen(sf, opc, N, immr, imms, Rn, Rd)  ((sf)<<31 | (opc)<<29 | 0b100100<<23 | (N)<<22 | (immr)<<16 | (imms)<<10 | (Rn)<<5 | Rd)
 // logic to get the mask is ... convoluted... list of possible value there: https://gist.github.com/dinfuehr/51a01ac58c0b23e4de9aac313ed6a06a
@@ -395,8 +414,10 @@
 // MRS
 #define MRS_gen(L, o0, op1, CRn, CRm, op2, Rt)  (0b1101010100<<22 | (L)<<21 | 1<<20 | (o0)<<19 | (op1)<<16 | (CRn)<<12 | (CRm)<<8 | (op2)<<5 | (Rt))
 // mrs    x0, nzcv : 1101010100 1 1 1 011 0100 0010 000 00000    o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0
-#define VMRS_nzvc(Rt)                   EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt))
-#define VMSR_nzvc(Rt)                   EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
+// MRS : from System register
+#define MRS_nzvc(Rt)                    EMIT(MRS_gen(1, 1, 3, 4, 2, 0, Rt))
+// MSR : to System register
+#define MSR_nzvc(Rt)                    EMIT(MRS_gen(0, 1, 3, 4, 2, 0, Rt))
 // mrs    x0, fpcr : 1101010100 1 1 1 011 0100 0100 000 00000    o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2
 #define VMRS(Rt)                        EMIT(MRS_gen(1, 1, 3, 4, 4, 0, Rt))
 #define VMSR(Rt)                        EMIT(MRS_gen(0, 1, 3, 4, 4, 0, Rt))
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index ddb6538a..b8c34ca8 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -369,6 +369,81 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;
         #undef GO
         
+        case 0x80:
+            nextop = F8;
+            switch((nextop>>3)&7) {
+                case 0: //ADD
+                    INST_NAME("ADD Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_add8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK;
+                    break;
+                case 1: //OR
+                    INST_NAME("OR Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_or8c(dyn, ninst, x1, u8, x2, x4);
+                    EBBACK;
+                    break;
+                case 2: //ADC
+                    INST_NAME("ADC Eb, Ib");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_adc8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK;
+                    break;
+                case 3: //SBB
+                    INST_NAME("SBB Eb, Ib");
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_sbb8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK;
+                    break;
+                case 4: //AND
+                    INST_NAME("AND Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_and8c(dyn, ninst, x1, u8, x2, x4);
+                    EBBACK;
+                    break;
+                case 5: //SUB
+                    INST_NAME("SUB Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK;
+                    break;
+                case 6: //XOR
+                    INST_NAME("XOR Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_xor8c(dyn, ninst, x1, u8, x2, x4);
+                    EBBACK;
+                    break;
+                case 7: //CMP
+                    INST_NAME("CMP Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    if(u8) {
+                        MOV32w(x2, u8);
+                        emit_cmp8(dyn, ninst, x1, x2, x3, x4, x5);
+                    } else {
+                        emit_cmp8_0(dyn, ninst, x1, x3, x4);
+                    }
+                    break;
+            }
+            break;
         case 0x81:
         case 0x83:
             nextop = F8;
diff --git a/src/dynarec/dynarec_arm64_emit_logic.c b/src/dynarec/dynarec_arm64_emit_logic.c
index bc529251..682400be 100755
--- a/src/dynarec/dynarec_arm64_emit_logic.c
+++ b/src/dynarec/dynarec_arm64_emit_logic.c
@@ -264,41 +264,37 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
 //}
 
 // emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
-//void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOV32(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s4, d_or8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s4);
-//    }
-//    IFX(X_ALL) {
-//        ORRS_IMM8(s1, s1, c, 0);
-//    } else {
-//        ORR_IMM8(s1, s1, c, 0);
-//    }
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_CF | X_AF | X_ZF) {
-//        BIC_IMM8(xFlags, xFlags, (1<<F_CF)|(1<<F_AF)|(1<<F_ZF), 0);
-//    }
-//    IFX(X_OF) {
-//        BIC_IMM8(xFlags, xFlags, 0b10, 0x0b);
-//    }
-//    IFX(X_ZF) {
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+{
+    MOV32w(s3, c&0xff);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_or8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    ORRw_REG(s1, s1, s3);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF | X_AF | X_ZF | X_OF) {
+        MOV32w(s3, (1<<F_ZF)|(1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+        BICw(xFlags, xFlags, s3);
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit XOR8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
 //void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
@@ -337,41 +333,37 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
 //}
 
 // emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
-//void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOV32(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s4, d_xor8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s4);
-//    }
-//    IFX(X_ALL) {
-//        XORS_IMM8(s1, s1, c);
-//    } else {
-//        XOR_IMM8(s1, s1, c);
-//    }
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_CF | X_AF | X_ZF) {
-//        BIC_IMM8(xFlags, xFlags, (1<<F_CF)|(1<<F_AF)|(1<<F_ZF), 0);
-//    }
-//    IFX(X_OF) {
-//        BIC_IMM8(xFlags, xFlags, 0b10, 0x0b);
-//    }
-//    IFX(X_ZF) {
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+{
+    MOV32w(s3, c&0xff);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_xor8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    EORw_REG(s1, s1, s3);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF | X_AF | X_ZF | X_OF) {
+        MOV32w(s3, (1<<F_ZF)|(1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+        BICw(xFlags, xFlags, s3);
+    }
+    IFX(X_ZF) {
+        TSTw_REG(s1, s1);
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit AND8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
 //void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
@@ -410,41 +402,40 @@ void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
 //}
 
 // emit AND8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
-//void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOV32(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s4, d_and8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s4);
-//    }
-//    IFX(X_ALL) {
-//        ANDS_IMM8(s1, s1, c);
-//    } else {
-//        AND_IMM8(s1, s1, c);
-//    }
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_CF | X_AF | X_ZF) {
-//        BIC_IMM8(xFlags, xFlags, (1<<F_CF)|(1<<F_AF)|(1<<F_ZF), 0);
-//    }
-//    IFX(X_OF) {
-//        BIC_IMM8(xFlags, xFlags, 0b10, 0x0b);
-//    }
-//    IFX(X_ZF) {
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+{
+    MOV32w(s3, c&0xff);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_and8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    IFX(X_ZF) {
+        ANDSw_REG(s1, s1, s3);
+    } else {
+        ANDw_REG(s1, s1, s3);
+    }
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF | X_AF | X_ZF | X_OF) {
+        MOV32w(s3, (1<<F_ZF)|(1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+        BICw(xFlags, xFlags, s3);
+    }
+    IFX(X_ZF) {
+        Bcond(cNE, +8);
+        ORRw_mask(xFlags, xFlags, 0b011010, 0); // mask=0x40
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 
 // emit OR16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
diff --git a/src/dynarec/dynarec_arm64_emit_math.c b/src/dynarec/dynarec_arm64_emit_math.c
index fcd2a73c..34232aba 100755
--- a/src/dynarec/dynarec_arm64_emit_math.c
+++ b/src/dynarec/dynarec_arm64_emit_math.c
@@ -334,55 +334,56 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
 //}
 
 // emit ADD8 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
-//void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOVW(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s3, d_add8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s3);
-//    }
-//    IFX(X_AF | X_OF) {
-//        ORR_IMM8(s3, s1, c, 0);     // s3 = op1 | op2
-//        AND_IMM8(s4, s1, c);        // s4 = op1 & op2
-//    }
-//    ADD_IMM8(s1, s1, c);
-//
-//    IFX(X_AF|X_OF) {
-//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 6);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
-//        }
-//    }
-//    IFX(X_CF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 8);
-//        BFI(xFlags, s3, F_CF, 1);
-//    }
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_ZF) {
-//        ANDS_IMM8(s1, s1, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5)
+{
+    IFX(X_PEND) {
+        MOV32w(s5, c&0xff);
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_add8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    IFX(X_AF | X_OF) {
+        if(X_PEND) {} else {MOV32w(s5, c&0xff);}
+        ORRw_REG(s3, s1, s5);       // s3 = op1 | op2
+        ANDw_REG(s4, s1, s5);       // s4 = op1 & op2
+    }
+    ADDw_U12(s1, s1, c);
+
+    IFX(X_AF|X_OF) {
+        BICw_REG(s3, s3, s1);   // s3 = (op1 | op2) & ~ res
+        ORRw_REG(s3, s3, s4);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+        }
+    }
+    IFX(X_CF) {
+        LSRw(s3, s1, 8);
+        BFIw(xFlags, s3, F_CF, 1);
+    }
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 0b000111);    //mask=000000ff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit SUB8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
 //void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
@@ -438,56 +439,61 @@ void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
 //}
 
 // emit SUB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
-//void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOVW(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s3, d_sub8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s3);
-//    }
-//    IFX(X_AF|X_OF|X_CF) {
-//        MVN_REG_LSL_IMM5(s3, s1, 0);
-//        MOVW(s4, c&0xff);
-//        ORR_IMM8(s3, s3, c, 0);             // s3 = ~op1 | op2
-//        BIC_REG_LSL_IMM5(s4, s4, s1, 0);    // s4 = ~op1 & op2
-//    }
-//    SUB_IMM8(s1, s1, c);
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_AF|X_OF|X_CF) {
-//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
-//        IFX(X_CF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 7);
-//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
-//        }
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 6);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
-//        }
-//    }
-//    IFX(X_ZF) {
-//        ANDS_IMM8(s1, s1, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5)
+{
+    IFX(X_ALL|X_PEND) {
+        MOV32w(s5, c&0xff);
+    }
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_sub8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    IFX(X_AF|X_OF|X_CF) {
+        MVNw_REG(s3, s1);
+        ORRw_REG(s3, s3, s5);    // s3 = ~op1 | op2
+        BICw_REG(s4, s5, s1);    // s4 = ~op1 & op2
+    }
+    IFX(X_ALL) {
+        SUBw_REG(s1, s1, s5);
+    } else {
+        SUBw_U12(s1, s1, c&0xff);
+    }
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF|X_OF|X_CF) {
+        ANDw_REG(s3, s3, s1);   // s3 = (~op1 | op2) & res
+        ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+        IFX(X_CF) {
+            LSRw(s4, s3, 7);
+            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+        }
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+        }
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 0b000111);    //mask=000000ff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit ADD16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
 //void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
@@ -1193,56 +1199,58 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
 //}
 
 // emit ADC8 instruction, from s1 , const c, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
-//void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOVW(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s4, d_adc8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s4);
-//    }
-//    IFX(X_AF | X_OF) {
-//        MOV_REG(s4, s1);
-//    }
-//    MOVS_REG_LSR_IMM5(s3, xFlags, 1);    // load CC into ARM CF
-//    ADC_IMM8(s1, s1, c);
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_AF|X_OF) {
-//        ORR_IMM8(s3, s4, c, 0);     // s3 = op1 | op2
-//        AND_IMM8(s4, s4, c);        // s4 = op1 & op2
-//        BIC_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (op1 | op2) & ~ res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 6);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
-//        }
-//    }
-//    IFX(X_CF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 8);
-//        BFI(xFlags, s3, F_CF, 1);
-//    }
-//    IFX(X_ZF) {
-//        ANDS_IMM8(s1, s1, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5)
+{
+    MOV32w(s5, c&0xff);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s5, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_adc8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    IFX(X_AF | X_OF) {
+        MOVw_REG(s4, s1);
+    }
+    MRS_nzvc(s3);
+    BFIx(s3, xFlags, 0, 1); // set C
+    MSR_nzvc(s3);      // load CC into ARM CF
+    ADCw_REG(s1, s1, s5);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF|X_OF) {
+        ORRw_REG(s3, s4, s5);        // s3 = op1 | op2
+        ANDw_REG(s4, s4, s5);        // s4 = op1 & op2
+        BICw_REG(s3, s3, s1);   // s3 = (op1 | op2) & ~ res
+        ORRw_REG(s3, s3, s4);   // s4 = (op1 & op2) | ((op1 | op2) & ~ res)
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+        }
+    }
+    IFX(X_CF) {
+        LSRw(s3, s1, 8);
+        BFIw(xFlags, s3, F_CF, 1);
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 0b000111);    //mask=000000ff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit ADC16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
 //void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
@@ -1547,57 +1555,59 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
 //}
 
 // emit SBB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
-//void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        MOVW(s3, c&0xff);
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s3, d_sbb8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s3);
-//    }
-//    IFX(X_AF|X_OF|X_CF) {
-//        MVN_REG_LSL_IMM5(s4, s1, 0);
-//    }
-//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
-//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
-//    SBC_IMM8(s1, s1, c);
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_AF|X_OF|X_CF) {
-//        ORR_IMM8(s3, s4, c, 0);             // s3 = ~op1 | op2
-//        AND_IMM8(s4, s4, c);                // s4 = ~op1 & op2
-//        AND_REG_LSL_IMM5(s3, s3, s1, 0);    // s3 = (~op1 | op2) & res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);    // s3 = (~op1 & op2) | ((~op1 | op2) & res)
-//        IFX(X_CF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 7);
-//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
-//        }
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 6);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
-//        }
-//    }
-//    IFX(X_ZF) {
-//        ANDS_IMM8(s1, s1, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5)
+{
+    MOV32w(s5, c&0xff);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s5, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_sbb8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    EORw_mask(s4, xFlags, 0, 0);            // invert CC because it's reverted for SUB on ARM
+    MRS_nzvc(s3);
+    BFIx(s3, s4, 0, 1); // set C
+    MSR_nzvc(s3);      // load CC into ARM CF
+    IFX(X_AF|X_OF|X_CF) {
+        MVNw_REG(s4, s1);
+    }
+    SBCw_REG(s1, s1, s5);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF|X_OF|X_CF) {
+        ORRw_REG(s3, s4, s5);               // s3 = ~op1 | op2
+        ANDw_REG(s4, s4, s5);               // s4 = ~op1 & op2
+        ANDw_REG(s3, s3, s1);               // s3 = (~op1 | op2) & res
+        ORRw_REG(s3, s3, s4);               // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+        IFX(X_CF) {
+            LSRw(s4, s3, 7);
+            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+        }
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+        }
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 0b000111);    //mask=000000ff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit SBB16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
 //void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
diff --git a/src/dynarec/dynarec_arm64_emit_tests.c b/src/dynarec/dynarec_arm64_emit_tests.c
index 16fdef1f..a6430847 100755
--- a/src/dynarec/dynarec_arm64_emit_tests.c
+++ b/src/dynarec/dynarec_arm64_emit_tests.c
@@ -203,93 +203,83 @@ void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int
 //    }
 //}
 // emit CMP8 instruction, from cmp s1 , s2, using s3 and s4 as scratch
-//void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s4, d_cmp8);
-//    } else {
-//        SET_DFNONE(s4);
-//    }
-//    SUB_REG_LSL_IMM5(s3, s1, s2, 0);   // res = s1 - s2
-//    IFX(X_PEND) {
-//        STR_IMM9(s3, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_ZF) {
-//        TSTS_IMM8(s3, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s4, s3, 7);
-//        BFI(xFlags, s4, F_SF, 1);
-//    }
-//    // bc = (res & (~d | s)) | (~d & s)
-//    IFX(X_CF|X_AF|X_OF) {
-//        MVN_REG_LSL_IMM5(s4, s1, 0);        // s4 = ~d
-//        ORR_REG_LSL_IMM5(s4, s4, s2, 0);    // s4 = ~d | s
-//        AND_REG_LSL_IMM5(s4, s4, s3, 0);    // s4 = res & (~d | s)
-//        BIC_REG_LSL_IMM5(s3, s2, s1, 0);    // loosing res... s3 = s & ~d
-//        ORR_REG_LSL_IMM5(s3, s4, s3, 0);    // s3 = (res & (~d | s)) | (s & ~d)
-//        IFX(X_CF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 7);
-//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
-//        }
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 6);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
-//        }
-//    }
-//    IFX(X_PF) {
-//        IFX(X_CF|X_AF|X_OF) {
-//            SUB_REG_LSL_IMM5(s3, s1, s2, 0);
-//        }
-//        AND_IMM8(s3, s3, 0xE0); // lsr 5 masking pre-applied
-//        MOV32(s4, GetParityTab());
-//        LDR_REG_LSR_IMM5(s4, s4, s3, 5-2);   // x/32 and then *4 because array is integer
-//        SUB_REG_LSL_IMM5(s3, s1, s2, 0);
-//        AND_IMM8(s3, s3, 31);
-//        MVN_REG_LSR_REG(s4, s4, s3);
-//        BFI(xFlags, s4, F_PF, 1);
-//    }
-//}
+void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_cmp8);
+    } else {
+        SET_DFNONE(s4);
+    }
+    SUBw_REG(s5, s1, s2);   // res = s1 - s2
+    IFX(X_PEND) {
+        STRB_REG(s5, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 0b000111);    //mask=000000ff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s5, s1, 7);
+        BFIw(xFlags, s5, F_SF, 1);
+    }
+    // bc = (res & (~d | s)) | (~d & s)
+    IFX(X_CF|X_AF|X_OF) {
+        MVNw_REG(s4, s1);       // s4 = ~d
+        ORRw_REG(s4, s4, s2);   // s4 = ~d | s
+        ANDw_REG(s4, s4, s5);   // s4 = res & (~d | s)
+        BICw_REG(s3, s2, s5);   // s3 = s & ~d
+        ORRw_REG(s3, s4, s3);   // s3 = (res & (~d | s)) | (s & ~d)
+        IFX(X_CF) {
+            LSRw(s4, s3, 7);
+            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+        }
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+        }
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s5, s3, s4);
+    }
+}
 // emit CMP8 instruction, from cmp s1 , 0, using s3 and s4 as scratch
-//void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        MOVW(s4, 0);
-//        STR_IMM9(s4, xEmu, offsetof(x64emu_t, op2));
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//        SET_DF(s3, d_cmp8);
-//    } else {
-//        SET_DFNONE(s4);
-//    }
-//    // bc = (res & (~d | s)) | (~d & s) = 0
-//    IFX(X_CF | X_AF | X_ZF) {
-//        BIC_IMM8(xFlags, xFlags, (1<<F_CF)|(1<<F_AF)|(1<<F_ZF), 0);
-//    }
-//    IFX(X_OF) {
-//        BFC(xFlags, F_OF, 1);
-//    }
-//    IFX(X_ZF) {
-//        TSTS_IMM8(s1, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s4, s1, 7);
-//        BFI(xFlags, s4, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
+{
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        MOV32w(s4, 0);
+        STRB_U12(s4, xEmu, offsetof(x64emu_t, op2));
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+        SET_DF(s3, d_cmp8);
+    } else {
+        SET_DFNONE(s4);
+    }
+    // bc = (res & (~d | s)) | (~d & s) = 0
+    IFX(X_CF | X_AF | X_ZF | X_OF) {
+        MOV32w(s3, (1<<F_ZF)|(1<<F_CF)|(1<<F_AF)|(1<<F_OF));
+        BICw(xFlags, xFlags, s3);
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 0b000111);    //mask=000000ff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit TEST32 instruction, from test s1 , s2, using s3 and s4 as scratch
 void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index 408b7e40..69f48f1a 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -142,18 +142,23 @@
 // Write back gd in correct register
 #define GWBACK       BFI((xEAX+((nextop&0x38)>>3)+(rex.r<<3)), gd, 0, 16);
 //GETEB will use i for ed, and can use r3 for wback.
-#define GETEB(i) if((nextop&0xC0)==0xC0) {  \
-                    wback = (nextop&7);     \
-                    wb2 = (wback>>2);       \
-                    wback = xEAX+(wback&3); \
-                    UXTB(i, wback, wb2);    \
-                    wb1 = 0;                \
-                    ed = i;                 \
-                } else {                    \
-                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 4095, 0); \
-                    LDRB_IMM9(i, wback, fixedaddress); \
-                    wb1 = 1;                \
-                    ed = i;                 \
+#define GETEB(i, D) if(MODREG) {                \
+                    if(rex.rex) {               \
+                        wback = xRAX+(nextop&7)+(rex.r<<3);     \
+                        wb2 = 0;                \
+                    } else {                    \
+                        wback = (nextop&7);     \
+                        wb2 = (wback>>2)*8;     \
+                        wback = xRAX+(wback&3); \
+                    }                           \
+                    UBFMw(i, wback, wb2, 7);    \
+                    wb1 = 0;                    \
+                    ed = i;                     \
+                } else {                        \
+                    addr = geted(dyn, addr, ninst, nextop, &wback, x3, &fixedaddress, 0xfff, 0, rex, 0, D); \
+                    LDRB_U12(i, wback, fixedaddress); \
+                    wb1 = 1;                    \
+                    ed = i;                     \
                 }
 //GETEBO will use i for ed, i is also Offset, and can use r3 for wback.
 #define GETEBO(i) if((nextop&0xC0)==0xC0) {  \
@@ -185,11 +190,11 @@
                     ed = i;                 \
                 }
 // Write eb (ed) back to original register / memory
-#define EBBACK   if(wb1) {STRB_IMM9(ed, wback, fixedaddress);} else {BFI(wback, ed, wb2*8, 8);}
+#define EBBACK   if(wb1) {STRB_U12(ed, wback, fixedaddress);} else {BFIx(wback, ed, wb2, 8);}
 //GETGB will use i for gd
 #define GETGB(i)    gd = (nextop&0x38)>>3;  \
                     gb2 = ((gd&4)>>2);      \
-                    gb1 = xEAX+(gd&3);      \
+                    gb1 = xRAX+(gd&3);      \
                     gd = i;                 \
                     nopenope!               \
                     UXTB(gd, gb1, gb2);
@@ -616,10 +621,10 @@ void iret_to_epilog(dynarec_arm_t* dyn, int ninst);
 void call_c(dynarec_arm_t* dyn, int ninst, void* fnc, int reg, int ret, int saveflags, int save_reg);
 //void grab_fsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg);
 //void grab_tlsdata(dynarec_arm_t* dyn, uintptr_t addr, int ninst, int reg);
-//void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 //void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_cmp32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
-//void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
+void emit_cmp8_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_cmp16_0(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_cmp32_0(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
 //void emit_test8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
@@ -628,11 +633,11 @@ void emit_test32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_add32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_add32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
 //void emit_add8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
-//void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
 void emit_sub32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_sub32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4, int s5);
 //void emit_sub8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
-//void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_sub8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
 void emit_or32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_xor32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
@@ -640,11 +645,11 @@ void emit_xor32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
 void emit_and32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_and32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 //void emit_or8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
-//void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_or8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_xor8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
-//void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_xor8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_and8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
-//void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_and8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_add16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
 //void emit_add16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_sub16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
@@ -664,13 +669,13 @@ void emit_dec32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4
 //void emit_adc32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 //void emit_adc32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
-//void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
 //void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
 //void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_sbb32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 //void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
-//void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
+void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
 //void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
 //void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 //void emit_neg32(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);