about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2021-03-20 18:13:18 +0100
committerptitSeb <sebastien.chev@gmail.com>2021-03-20 18:13:18 +0100
commite4fa7bc969571ac154e7c8a7027b24e6e8946a0c (patch)
treea356cb9b81c5f964b8fe47f8f80590dff7444c63 /src
parent98b0ac713d8f0558713a801c7219a22bf3af74e2 (diff)
downloadbox64-e4fa7bc969571ac154e7c8a7027b24e6e8946a0c.tar.gz
box64-e4fa7bc969571ac154e7c8a7027b24e6e8946a0c.zip
[DYNAREC] Added (66) 18..1D SBB opcodes
Diffstat (limited to 'src')
-rwxr-xr-xsrc/dynarec/arm64_printer.c69
-rwxr-xr-xsrc/dynarec/dynarec_arm64_00.c57
-rwxr-xr-xsrc/dynarec/dynarec_arm64_66.c31
-rwxr-xr-xsrc/dynarec/dynarec_arm64_emit_math.c316
-rwxr-xr-xsrc/dynarec/dynarec_arm64_helper.h6
5 files changed, 315 insertions, 164 deletions
diff --git a/src/dynarec/arm64_printer.c b/src/dynarec/arm64_printer.c
index 57024b6b..cb86ce42 100755
--- a/src/dynarec/arm64_printer.c
+++ b/src/dynarec/arm64_printer.c
@@ -16,9 +16,9 @@ static const char* conds[] = {"cEQ", "cNE", "cCS", "cCC", "cMI", "cPL", "cVS", "
 #define abs(A) (((A)<0)?(-(A)):(A))

 

 typedef struct arm64_print_s {

-    int N, S, U;

+    int N, S, U, L;

     int t, n, m, d, t2, a;

-    int f, c, o, h;

+    int f, c, o, h, p;

     int i, r, s;

     int x, w;

 } arm64_print_t;

@@ -60,9 +60,11 @@ int isMask(uint32_t opcode, const char* mask, arm64_print_t *a)
             case 'N': a->N = (a->N<<1) | v; break;

             case 'S': a->S = (a->S<<1) | v; break;

             case 'U': a->U = (a->U<<1) | v; break;

+            case 'L': a->L = (a->L<<1) | v; break;

             case 't': a->t = (a->t<<1) | v; break;

             case '2': a->t2 = (a->t2<<1) | v; break;

             case 'n': a->n = (a->n<<1) | v; break;

+            case 'p': a->p = (a->p<<1) | v; break;

             case 'm': a->m = (a->m<<1) | v; break;

             case 'a': a->a = (a->a<<1) | v; break;

             case 'd': a->d = (a->d<<1) | v; break;

@@ -374,6 +376,36 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
             snprintf(buff, sizeof(buff), "SUBS %s, %s, %s %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm);

         return buff;

     }

+    if(isMask(opcode, "f0011010000mmmmm000000nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "ADC %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]);

+        return buff;

+    }

+    if(isMask(opcode, "f0111010000mmmmm000000nnnnnddddd", &a)) {

+        snprintf(buff, sizeof(buff), "ADCS %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]);

+        return buff;

+    }

+    if(isMask(opcode, "f1011010000mmmmm000000nnnnnddddd", &a)) {

+        if(Rn==31)

+            snprintf(buff, sizeof(buff), "NGC %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rm]:Wt[Rm]);

+        else

+            snprintf(buff, sizeof(buff), "SBC %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]);

+        return buff;

+    }

+    if(isMask(opcode, "f1111010000mmmmm000000nnnnnddddd", &a)) {

+        if(Rn==31)

+            snprintf(buff, sizeof(buff), "NGCS %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rm]:Wt[Rm]);

+        else

+            snprintf(buff, sizeof(buff), "SBCS %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]);

+        return buff;

+    }

+    if(isMask(opcode, "f1101011hh0mmmmmiiiiiinnnnnddddd", &a)) {

+        const char* shifts[] = { "LSL", "LSR", "ASR", "???"};

+        if(shift==0 && imm==0)

+            snprintf(buff, sizeof(buff), "SUBS %s, %s, %s", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm]);

+        else 

+            snprintf(buff, sizeof(buff), "SUBS %s, %s, %s %s %d", sf?Xt[Rd]:Wt[Rd], sf?Xt[Rn]:Wt[Rn], sf?Xt[Rm]:Wt[Rm], shifts[shift], imm);

+        return buff;

+    }

     // ---- LOGIC

     if(isMask(opcode, "f11100100Nrrrrrrssssssnnnnnddddd", &a)) {

         uint64_t i = DecodeBitMasks(a.N, imms, immr);

@@ -613,6 +645,39 @@ const char* arm64_print(uint32_t opcode, uintptr_t addr)
         return buff;

     }

 

+    // MRS / MSR

+    if(isMask(opcode, "110101010001opppnnnnmmmm222ttttt", &a)) {

+        const char* reg=NULL;

+        //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 => nzcv

+        //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 => fpcr

+        if(a.o==1 && a.p==3 && a.n==4 && a.m==2 && a.t2==0)

+            reg="nzcv";

+        else if(a.o==1 && a.p==3 && a.n==4 && a.m==4 && a.t2==2)

+            reg="fpcr";

+

+        if(!reg)

+            snprintf(buff, sizeof(buff), "MSR S%d_%d_%d_%d_%d, %s", 2+a.o, a.p, a.n, a.m, a.t2, Xt[Rt]);

+        else

+            snprintf(buff, sizeof(buff), "MSR %s, %s", reg, Xt[Rt]);

+        return buff;

+    }

+    if(isMask(opcode, "110101010011opppnnnnmmmm222ttttt", &a)) {

+        const char* reg=NULL;

+        //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0010(2) op2=0 => nzcv

+        //o0=1(op0=3), op1=0b011(3) CRn=0b0100(4) CRm=0b0100(4) op2=2 => fpcr

+        if(a.o==1 && a.p==3 && a.n==4 && a.m==2 && a.t2==0)

+            reg="nzcv";

+        else if(a.o==1 && a.p==3 && a.n==4 && a.m==4 && a.t2==2)

+            reg="fpcr";

+

+        if(!reg)

+            snprintf(buff, sizeof(buff), "MRS %s, S%d_%d_%d_%d_%d", Xt[Rt], 2+a.o, a.p, a.n, a.m, a.t2);

+        else

+            snprintf(buff, sizeof(buff), "MRS %s, %s", Xt[Rt], reg);

+        return buff;

+    }

+

+

     snprintf(buff, sizeof(buff), "%08X ???", __builtin_bswap32(opcode));

     return buff;

 }
\ No newline at end of file
diff --git a/src/dynarec/dynarec_arm64_00.c b/src/dynarec/dynarec_arm64_00.c
index b63efd2f..7522acfa 100755
--- a/src/dynarec/dynarec_arm64_00.c
+++ b/src/dynarec/dynarec_arm64_00.c
@@ -218,6 +218,63 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             emit_adc32(dyn, ninst, rex, xRAX, x1, x3, x4);
             break;
 
+        case 0x18:
+            INST_NAME("SBB Eb, Gb");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_sbb8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK;
+            break;
+        case 0x19:
+            INST_NAME("SBB Ed, Gd");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_sbb32(dyn, ninst, rex, ed, gd, x3, x4);
+            WBACK;
+            break;
+        case 0x1A:
+            INST_NAME("SBB Gb, Eb");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET);
+            nextop = F8;
+            GETEB(x2, 0);
+            GETGB(x1);
+            emit_sbb8(dyn, ninst, x1, x2, x3, x4);
+            GBBACK;
+            break;
+        case 0x1B:
+            INST_NAME("SBB Gd, Ed");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_sbb32(dyn, ninst, rex, gd, ed, x3, x4);
+            break;
+        case 0x1C:
+            INST_NAME("SBB AL, Ib");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET);
+            u8 = F8;
+            UXTBw(x1, xRAX);
+            emit_sbb8c(dyn, ninst, x1, u8, x3, x4, x5);
+            BFIx(xRAX, x1, 0, 8);
+            break;
+        case 0x1D:
+            INST_NAME("SBB EAX, Id");
+            READFLAGS(X_CF);
+            SETFLAGS(X_ALL, SF_SET);
+            i64 = F32S;
+            MOV64xw(x2, i64);
+            emit_sbb32(dyn, ninst, rex, xRAX, x2, x3, x4);
+            break;
+
         case 0x21:
             INST_NAME("AND Ed, Gd");
             SETFLAGS(X_ALL, SF_SET);
diff --git a/src/dynarec/dynarec_arm64_66.c b/src/dynarec/dynarec_arm64_66.c
index d0e653ed..f17e76b8 100755
--- a/src/dynarec/dynarec_arm64_66.c
+++ b/src/dynarec/dynarec_arm64_66.c
@@ -143,6 +143,37 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             BFIx(xRAX, x1, 0, 16);

             break;

 

+        case 0x19:

+            INST_NAME("SBB Ew, Gw");

+            READFLAGS(X_CF);

+            SETFLAGS(X_ALL, SF_SET);

+            nextop = F8;

+            GETGW(x2);

+            GETEW(x1, 0);

+            emit_sbb16(dyn, ninst, x1, x2, x4, x5);

+            EWBACK;

+            break;

+        case 0x1B:

+            INST_NAME("SBB Gw, Ew");

+            READFLAGS(X_CF);

+            SETFLAGS(X_ALL, SF_SET);

+            nextop = F8;

+            GETGW(x1);

+            GETEW(x2, 0);

+            emit_sbb16(dyn, ninst, x1, x2, x4, x3);

+            GWBACK;

+            break;

+        case 0x1D:

+            INST_NAME("SBB AX, Iw");

+            READFLAGS(X_CF);

+            SETFLAGS(X_ALL, SF_SET);

+            i16 = F16;

+            UXTHw(x1, xRAX);

+            MOV32w(x2, i16);

+            emit_sbb16(dyn, ninst, x1, x2, x3, x4);

+            BFIx(xRAX, x1, 0, 16);

+            break;

+

         case 0xD1:

         case 0xD3:

             nextop = F8;

diff --git a/src/dynarec/dynarec_arm64_emit_math.c b/src/dynarec/dynarec_arm64_emit_math.c
index 151847f3..7ecef218 100755
--- a/src/dynarec/dynarec_arm64_emit_math.c
+++ b/src/dynarec/dynarec_arm64_emit_math.c
@@ -1365,58 +1365,59 @@ void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 //}
 
 // emit SBB32 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch
-//void emit_sbb32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
-//{
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s3, d_sbb32);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s3);
-//    }
-//    IFX(X_AF) {
-//        MVN_REG_LSL_IMM5(s4, s1, 0);
-//    }
-//    XOR_IMM8(s3, xFlags, 1);                // invert CC because it's reverted for SUB on ARM
-//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
-//    IFX(X_ZF|X_CF|X_OF) {
-//        SBCS_REG_LSL_IMM5(s1, s1, s2, 0);
-//    } else {
-//        SBC_REG_LSL_IMM5(s1, s1, s2, 0);
-//    }
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_AF) {
-//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = ~op1 | op2
-//        AND_REG_LSL_IMM5(s4, s2, s4, 0);    // s4 = ~op1 & op2
-//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s4 = (~op1 & op2) | ((~op1 | op2) & res)
-//        UBFX(s4, s3, 3, 1);
-//        BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//    }
-//    IFX(X_ZF|X_CF) {
-//        BIC_IMM8(xFlags, xFlags, (1<<F_ZF)|(1<<F_CF), 0);
-//    }
-//    IFX(X_ZF) {
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_CF) {
-//        // Inverted carry
-//        ORR_IMM8_COND(cCC, xFlags, xFlags, 1<<F_CF, 0);
-//    }
-//    IFX(X_OF) {
-//        ORR_IMM8_COND(cVS, xFlags, xFlags, 0b10, 0x0b);
-//        BIC_IMM8_COND(cVC, xFlags, xFlags, 0b10, 0x0b);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 31);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//}
+void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRxw_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, rex.w?d_sbb64:d_sbb32);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    EORw_mask(s4, xFlags, 0, 0);        // invert CC because it's reverted for SUB on ARM
+    MRS_nzvc(s3);
+    BFIx(s3, s4, 29, 1); // set C
+    MSR_nzvc(s3);      // load CC into ARM CF
+    IFX(X_AF) {
+        MVNxw_REG(s4, s1);
+    }
+    IFX(X_ZF|X_CF|X_OF) {
+        SBCSxw_REG(s1, s1, s2);
+    } else {
+        SBCxw_REG(s1, s1, s2);
+    }
+    IFX(X_PEND) {
+        STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF) {
+        ORRxw_REG(s3, s4, s2);    // s3 = ~op1 | op2
+        ANDxw_REG(s4, s2, s4);    // s4 = ~op1 & op2
+        ANDxw_REG(s3, s3, s1);   // s3 = (~op1 | op2) & res
+        ORRxw_REG(s3, s3, s4);   // s4 = (~op1 & op2) | ((~op1 | op2) & res)
+        UBFXw(s4, s3, 3, 1);
+        BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+    }
+    IFX(X_ZF) {
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_CF) {
+        // Inverted carry
+        CSETw(s3, cCC);
+        BFIw(xFlags, s3, F_CF, 1);
+    }
+    IFX(X_OF) {
+        CSETw(s3, cVS);
+        BFIw(xFlags, s3, F_OF, 1);
+    }
+    IFX(X_SF) {
+        LSRxw(s3, s1, rex.w?63:31);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit SBB32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 //void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
@@ -1491,59 +1492,58 @@ void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
 //}
 
 // emit SBB8 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
-//void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
-//{
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s3, d_sbb8);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s3);
-//    }
-//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
-//    IFX(X_AF|X_OF|X_CF) {
-//        MVN_REG_LSL_IMM5(s4, s1, 0);
-//    }
-//
-//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
-//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
-//    SBC_REG_LSL_IMM5(s1, s1, s2, 0);
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_AF|X_OF|X_CF) {
-//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = ~op1 | op2
-//        AND_REG_LSL_IMM5(s4, s2, s4, 0);    // s4 = ~op1 & op2
-//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
-//        IFX(X_CF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 7);
-//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
-//        }
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 6);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
-//        }
-//    }
-//    IFX(X_ZF) {
-//        ANDS_IMM8(s1, s1, 0xff);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 7);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
-//}
+void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRB_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_sbb8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    EORw_mask(s4, xFlags, 0, 0);        // invert CC because it's reverted for SUB on ARM
+    MRS_nzvc(s3);
+    BFIx(s3, s4, 29, 1); // set C
+    MSR_nzvc(s3);      // load CC into ARM CF
+    IFX(X_AF|X_OF|X_CF) {
+        MVNw_REG(s4, s1);
+    }
+    SBCw_REG(s1, s1, s2);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF|X_OF|X_CF) {
+        ORRw_REG(s3, s4, s2);    // s3 = ~op1 | op2
+        ANDw_REG(s4, s2, s4);    // s4 = ~op1 & op2
+        ANDw_REG(s3, s3, s1);   // s3 = (~op1 | op2) & res
+        ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+        IFX(X_CF) {
+            LSRw(s4, s3, 7);
+            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
+        }
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+        }
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 7);   //mask=0xff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 7);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit SBB8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, int s5)
@@ -1601,60 +1601,58 @@ void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4, in
 }
 
 // emit SBB16 instruction, from s1 , s2, store result in s1 using s3 and s4 as scratch, with save_s4 is s4 need to be saved
-//void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4)
-//{
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, op1));
-//        STR_IMM9(s2, xEmu, offsetof(x64emu_t, op2));
-//        SET_DF(s3, d_sbb16);
-//    } else IFX(X_ALL) {
-//        SET_DFNONE(s3);
-//    }
-//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {PUSH(xSP, 1<<s4);}}
-//    IFX(X_AF|X_OF|X_CF) {
-//        MVN_REG_LSL_IMM5(s4, s1, 0);
-//    }
-//
-//    XOR_IMM8(s3, xFlags, 1);            // invert CC because it's reverted for SUB on ARM
-//    MOVS_REG_LSR_IMM5(s3, s3, 1);       // load into ARM CF
-//    SBC_REG_LSL_IMM5(s1, s1, s2, 0);
-//    IFX(X_PEND) {
-//        STR_IMM9(s1, xEmu, offsetof(x64emu_t, res));
-//    }
-//    IFX(X_AF|X_OF|X_CF) {
-//        ORR_REG_LSL_IMM5(s3, s4, s2, 0);    // s3 = ~op1 | op2
-//        AND_REG_LSL_IMM5(s4, s2, s4, 0);    // s4 = ~op1 & op2
-//        AND_REG_LSL_IMM5(s3, s3, s1, 0);   // s3 = (~op1 | op2) & res
-//        ORR_REG_LSL_IMM5(s3, s3, s4, 0);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
-//        IFX(X_CF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 15);
-//            BFI(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
-//        }
-//        IFX(X_AF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 3);
-//            BFI(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
-//        }
-//        IFX(X_OF) {
-//            MOV_REG_LSR_IMM5(s4, s3, 14);
-//            XOR_REG_LSR_IMM8(s4, s4, s4, 1);
-//            BFI(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
-//        }
-//    }
-//    IFX(X_ZF) {
-//        UXTH(s1, s1, 0);
-//        TSTS_REG_LSL_IMM5(s1, s1, 0);
-//        ORR_IMM8_COND(cEQ, xFlags, xFlags, 1<<F_ZF, 0);
-//        BIC_IMM8_COND(cNE, xFlags, xFlags, 1<<F_ZF, 0);
-//    }
-//    IFX(X_SF) {
-//        MOV_REG_LSR_IMM5(s3, s1, 15);
-//        BFI(xFlags, s3, F_SF, 1);
-//    }
-//    IFX(X_PF) {
-//        emit_pf(dyn, ninst, s1, s3, s4);
-//    }
-//    IFX(X_AF|X_OF|X_CF|X_PF){if(save_s4) {POP(xSP, 1<<s4);}}
-//}
+void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, op1));
+        STRH_U12(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, d_sbb16);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s3);
+    }
+    EORw_mask(s4, xFlags, 0, 0);            // invert CC because it's reverted for SUB on ARM
+    MRS_nzvc(s3);
+    BFIx(s3, s4, 29, 1); // set C, bit 29
+    MSR_nzvc(s3);      // load CC into ARM CF
+    IFX(X_AF|X_OF|X_CF) {
+        MVNw_REG(s4, s1);
+    }
+    SBCw_REG(s1, s1, s2);
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_AF|X_OF|X_CF) {
+        ORRw_REG(s3, s4, s2);    // s3 = ~op1 | op2
+        ANDw_REG(s4, s2, s4);    // s4 = ~op1 & op2
+        ANDw_REG(s3, s3, s1);   // s3 = (~op1 | op2) & res
+        ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+        IFX(X_CF) {
+            LSRw(s4, s3, 15);
+            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
+        }
+        IFX(X_AF) {
+            LSRw(s4, s3, 3);
+            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_OF) {
+            LSRw(s4, s3, 14);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+        }
+    }
+    IFX(X_ZF) {
+        ANDSw_mask(s1, s1, 0, 15);  //mask=0xffff
+        CSETw(s3, cEQ);
+        BFIw(xFlags, s3, F_ZF, 1);
+    }
+    IFX(X_SF) {
+        LSRw(s3, s1, 15);
+        BFIw(xFlags, s3, F_SF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
 
 // emit SBB16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 //void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
diff --git a/src/dynarec/dynarec_arm64_helper.h b/src/dynarec/dynarec_arm64_helper.h
index 8838ce27..ab80c286 100755
--- a/src/dynarec/dynarec_arm64_helper.h
+++ b/src/dynarec/dynarec_arm64_helper.h
@@ -682,11 +682,11 @@ void emit_adc8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_adc8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
 void emit_adc16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 //void emit_adc16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_sbb32(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_sbb32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 //void emit_sbb32c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
-//void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
+void emit_sbb8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_sbb8c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4, int s5);
-//void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, int save_s4);
+void emit_sbb16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 //void emit_sbb16c(dynarec_arm_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 void emit_neg32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
 void emit_neg16(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4);