about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2023-11-10 17:24:17 +0100
committerptitSeb <sebastien.chev@gmail.com>2023-11-10 17:24:17 +0100
commitb82d9d15d4040c568734b5cba949975c01315955 (patch)
treef55b81b4aa1a887e3b9a694033bef392c3a85458 /src/dynarec
parent3bc0495eb259302bfbe445c29a61531b11393a68 (diff)
downloadbox64-b82d9d15d4040c568734b5cba949975c01315955.tar.gz
box64-b82d9d15d4040c568734b5cba949975c01315955.zip
[ARM64_DYNAREC] More fixing and improving on shifting opcodes
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c206
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c45
-rw-r--r--src/dynarec/arm64/dynarec_arm64_64.c6
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c22
-rw-r--r--src/dynarec/arm64/dynarec_arm64_660f.c22
-rw-r--r--src/dynarec/arm64/dynarec_arm64_67.c4
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_math.c7
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c177
-rw-r--r--src/dynarec/arm64/dynarec_arm64_helper.h20
-rw-r--r--src/dynarec/dynarec_native_functions.c6
-rw-r--r--src/dynarec/dynarec_native_functions.h2
11 files changed, 391 insertions, 126 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 2233c27f..ded12737 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -769,9 +769,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SMULL(gd, ed, x4);
                     UFLAG_RES(gd);
                     LSRx(x3, gd, 32);
+                    MOVw_REG(gd, gd);
                     UFLAG_OP1(x3);
                     UFLAG_DF(x3, d_imul32);
-                    MOVw_REG(gd, gd);
                 } else {
                     MULxw(gd, ed, x4);
                 }
@@ -1733,22 +1733,22 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             switch((nextop>>3)&7) {
                 case 0:
                     INST_NAME("ROL Eb, Ib");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    if(geted_ib(dyn, addr, ninst, nextop)&0x1f) {
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    }
                     GETEB(x1, 1);
                     u8 = F8;
-                    MOV32w(x2, u8);
-                    CALL_(rol8, ed, x3);
+                    emit_rol8c(dyn, ninst, x1, u8&7, x4, x5);
                     EBBACK;
                     break;
                 case 1:
                     INST_NAME("ROR Eb, Ib");
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    SETFLAGS(X_OF|X_CF, SF_SET);
+                    if(geted_ib(dyn, addr, ninst, nextop)&0x1f) {
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    }
                     GETEB(x1, 1);
                     u8 = F8;
-                    MOV32w(x2, u8);
-                    CALL_(ror8, ed, x3);
+                    emit_ror8c(dyn, ninst, x1, u8&7, x4, x5);
                     EBBACK;
                     break;
                 case 2:
@@ -1839,7 +1839,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             switch((nextop>>3)&7) {
                 case 0:
                     INST_NAME("ROL Ed, Ib");
-                    SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    if(geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20))) {
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    }
                     GETED(1);
                     u8 = (F8)&(rex.w?0x3f:0x1f);
                     emit_rol32c(dyn, ninst, rex, ed, u8, x3, x4);
@@ -1847,7 +1849,9 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;
                 case 1:
                     INST_NAME("ROR Ed, Ib");
-                    SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    if(geted_ib(dyn, addr, ninst, nextop)&(0x1f+(rex.w*0x20))) {
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);
+                    }
                     GETED(1);
                     u8 = (F8)&(rex.w?0x3f:0x1f);
                     emit_ror32c(dyn, ninst, rex, ed, u8, x3, x4);
@@ -2099,30 +2103,71 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 0:
                     if(opcode==0xD0) {
                         INST_NAME("ROL Eb, 1");
-                        MOV32w(x2, 1);
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET);
+                        GETEB(x1, 0);
+                        emit_rol8c(dyn, ninst, ed, 1, x4, x5);
+                        EBBACK;
                     } else {
                         INST_NAME("ROL Eb, CL");
-                        ANDSw_mask(x2, xRCX, 0, 0b00100);
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET);
+                        UFLAG_IF {
+                            TSTw_mask(xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
+                        ANDw_mask(x2, xRCX, 0, 0b00010);  //mask=0x000000007
+                        MOV32w(x4, 8);
+                        SUBx_REG(x2, x4, x2);
+                        GETEB(x1, 0);
+                        UFLAG_IF {
+                            B_NEXT(cEQ);
+                        }
+                        ORRw_REG_LSL(ed, ed, ed, 8);
+                        LSRw_REG(ed, ed, x2);
+                        EBBACK;
+                        UFLAG_IF {  // calculate flags directly
+                            CMPSw_U12(x2, 7);
+                            B_MARK(cNE);
+                                LSRxw(x3, ed, 7);
+                                ADDxw_REG(x3, x3, ed);
+                                BFIw(xFlags, x3, F_OF, 1);
+                            MARK;
+                            BFIw(xFlags, ed, F_CF, 1);
+                            UFLAG_DF(x2, d_none);
+                        }
                     }
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    SETFLAGS(X_OF|X_CF, SF_SET);
-                    GETEB(x1, 0);
-                    CALL_(rol8, x1, x3);
-                    EBBACK;
                     break;
                 case 1:
                     if(opcode==0xD0) {
                         INST_NAME("ROR Eb, 1");
                         MOV32w(x2, 1);
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET);
+                        GETEB(x1, 0);
+                        emit_ror8c(dyn, ninst, ed, 1, x4, x5);
+                        EBBACK;
                     } else {
                         INST_NAME("ROR Eb, CL");
-                        ANDSw_mask(x2, xRCX, 0, 0b00100);
+                        SETFLAGS(X_OF|X_CF, SF_SUBSET);
+                        UFLAG_IF {
+                            TSTw_mask(xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
+                        ANDw_mask(x2, xRCX, 0, 0b00010);  //mask=0x000000007
+                        GETEB(x1, 0);
+                        UFLAG_IF {
+                            B_NEXT(cEQ);
+                        }
+                        ORRw_REG_LSL(ed, ed, ed, 8);
+                        LSRw_REG(ed, ed, x2);
+                        EBBACK;
+                        UFLAG_IF {  // calculate flags directly
+                            CMPSw_U12(x2, 1);
+                            B_MARK(cNE);
+                                LSRxw(x2, ed, 6); // x2 = d>>30
+                                EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1))
+                                BFIw(xFlags, x2, F_OF, 1);
+                            MARK;
+                            BFXILw(xFlags, ed, 7, 1);
+                            UFLAG_DF(x2, d_none);
+                        }
                     }
-                    MESSAGE(LOG_DUMP, "Need Optimization\n");
-                    SETFLAGS(X_OF|X_CF, SF_SET);
-                    GETEB(x1, 0);
-                    CALL_(ror8, x1, x3);
-                    EBBACK;
                     break;
                 case 2:
                     if(opcode==0xD0) {INST_NAME("RCL Eb, 1");} else {INST_NAME("RCL Eb, CL");}
@@ -2262,16 +2307,26 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 0:
                     INST_NAME("ROL Ed, CL");
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
-                    if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                    UFLAG_IF {
+                        if(rex.w) {
+                            ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        if(rex.w) {
+                            ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     }
                     MOV64xw(x4, (rex.w?64:32));
                     SUBx_REG(x3, x4, x3);
                     GETED(0);
-                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    UFLAG_IF {
+                        if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
+                        B_NEXT(cEQ);
+                    }
                     RORxw_REG(ed, ed, x3);
                     WBACK;
                     UFLAG_IF {  // calculate flags directly
@@ -2288,14 +2343,24 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 1:
                     INST_NAME("ROR Ed, CL");
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
-                    if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                    UFLAG_IF {
+                        if(rex.w) {
+                            ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        if(rex.w) {
+                            ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     }
                     GETED(0);
-                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    UFLAG_IF {
+                        if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
+                        B_NEXT(cEQ);
+                    }
                     RORxw_REG(ed, ed, x3);
                     WBACK;
                     UFLAG_IF {  // calculate flags directly
@@ -2305,8 +2370,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                             EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>30) ^ ((d>>30)>>1))
                             BFIw(xFlags, x2, F_OF, 1);
                         MARK;
-                        LSRxw(x2, ed, rex.w?63:31);
-                        BFIw(xFlags, x2, F_CF, 1);
+                        BFXILxw(xFlags, ed, rex.w?63:31, 1);
                         UFLAG_DF(x2, d_none);
                     }
                     break;
@@ -2346,42 +2410,72 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 6:
                     INST_NAME("SHL Ed, CL");
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
-                    if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                    UFLAG_IF {
+                        if(rex.w) {
+                            ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        if(rex.w) {
+                            ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     }
                     GETED(0);
-                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    UFLAG_IF {
+                        if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
+                        B_NEXT(cEQ);
+                    }
                     emit_shl32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACK;
                     break;
                 case 5:
                     INST_NAME("SHR Ed, CL");
                     SETFLAGS(X_ALL, SF_SET_PENDING);    // some flags are left undefined
-                    if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                    UFLAG_IF {
+                        if(rex.w) {
+                            ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        if(rex.w) {
+                            ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     }
                     GETED(0);
-                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    UFLAG_IF {
+                        if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
+                        B_NEXT(cEQ);
+                    }
                     emit_shr32(dyn, ninst, rex, ed, x3, x5, x4);
                     WBACK;
                     break;
                 case 7:
                     INST_NAME("SAR Ed, CL");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    if(rex.w) {
-                        ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                    UFLAG_IF {
+                        if(rex.w) {
+                            ANDSx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     } else {
-                        ANDSw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        if(rex.w) {
+                            ANDx_mask(x3, xRCX, 1, 0, 0b00101);  //mask=0x000000000000003f
+                        } else {
+                            ANDw_mask(x3, xRCX, 0, 0b00100);  //mask=0x00000001f
+                        }
                     }
                     GETED(0);
-                    if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
-                    B_NEXT(cEQ);
+                    UFLAG_IF {
+                        if(!rex.w && !rex.is32bits && MODREG) {MOVw_REG(ed, ed);}
+                        B_NEXT(cEQ);
+                    }
                     UFLAG_OP12(ed, x3);
                     ASRxw_REG(ed, ed, x3);
                     WBACK;
@@ -2661,29 +2755,29 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 3:
                     INST_NAME("NEG Eb");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
-                    GETEB(x1, 0);
+                    GETSEB(x1, 0);
                     emit_neg8(dyn, ninst, x1, x2, x4);
                     EBBACK;
                     break;
                 case 4:
                     INST_NAME("MUL AL, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x1, d_mul8);
                     GETEB(x1, 0);
                     UXTBw(x2, xRAX);
                     MULw(x1, x2, x1);
                     UFLAG_RES(x1);
                     BFIx(xRAX, x1, 0, 16);
+                    UFLAG_DF(x1, d_mul8);
                     break;
                 case 5:
                     INST_NAME("IMUL AL, Eb");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x1, d_imul8);
                     GETSEB(x1, 0);
                     SXTBw(x2, xRAX);
                     MULw(x1, x2, x1);
                     UFLAG_RES(x1);
                     BFIx(xRAX, x1, 0, 16);
+                    UFLAG_DF(x1, d_imul8);
                     break;
                 case 6:
                     INST_NAME("DIV Eb");
@@ -2730,7 +2824,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 4:
                     INST_NAME("MUL EAX, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x2, rex.w?d_mul64:d_mul32);
                     GETED(0);
                     if(rex.w) {
                         if(ed==xRDX) gd=x3; else gd=xRDX;
@@ -2744,11 +2837,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
+                    UFLAG_DF(x2, rex.w?d_mul64:d_mul32);
                     break;
                 case 5:
                     INST_NAME("IMUL EAX, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);
                     GETED(0);
                     if(rex.w) {
                         if(ed==xRDX) gd=x3; else gd=xRDX;
@@ -2762,6 +2855,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
+                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);
                     break;
                 case 6:
                     INST_NAME("DIV Ed");
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 7b7cc338..48af11a0 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1333,17 +1333,13 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 ANDw_mask(x2, gd, 0, 0b00100);  //mask=0x00000001f

             }

-            LSRxw_REG(x4, ed, x2);

-            if(rex.w) {

-                ANDSx_mask(x4, x4, 1, 0, 0);  //mask=1

-            } else {

-                ANDSw_mask(x4, x4, 0, 0);  //mask=1

+            IFX(X_CF) {

+                LSRxw_REG(x4, ed, x2);

+                BFIw(xFlags, x4, F_CF, 1);

             }

-            BFIw(xFlags, x4, F_CF, 1);

             MOV32w(x4, 1);

             LSLxw_REG(x4, x4, x2);

-            EORxw_REG(x4, ed, x4);

-            CSELxw(ed, ed, x4, cNE);

+            ORRxw_REG(ed, ed, x4);

             if(wback) {

                 STRxw_U12(ed, wback, fixedaddress);

                 SMWRITE();

@@ -1468,8 +1464,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     UFLAG_RES(gd);

                     LSRx(x3, gd, 32);

                     UFLAG_OP1(x3);

-                    UFLAG_DF(x3, d_imul32);

                     MOVw_REG(gd, gd);

+                    UFLAG_DF(x3, d_imul32);

                 } else {

                     MULxw(gd, gd, ed);

                 }

@@ -1499,13 +1495,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 ANDw_mask(x2, gd, 0, 0b00100);  //mask=0x00000001f

             }

-            LSRxw_REG(x4, ed, x2);

-            if(rex.w) {

-                ANDx_mask(x4, x4, 1, 0, 0);  //mask=1

-            } else {

-                ANDw_mask(x4, x4, 0, 0);  //mask=1

+            IFX(X_CF) {

+                LSRxw_REG(x4, ed, x2);

+                BFIw(xFlags, x4, F_CF, 1);

             }

-            BFIw(xFlags, x4, F_CF, 1);

             MOV32w(x4, 1);

             LSLxw_REG(x4, x4, x2);

             BICxw_REG(ed, ed, x4);

@@ -1584,7 +1577,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     u8 = F8;

                     u8&=(rex.w?0x3f:0x1f);

-                    BFXILxw(xFlags, ed, u8, 1);  // inject 1 bit from u8 to F_CF (i.e. pos 0)

+                    IFX(X_CF) {

+                        BFXILxw(xFlags, ed, u8, 1);  // inject 1 bit from u8 to F_CF (i.e. pos 0)

+                    }

                     MOV32w(x4, 1);

                     ORRxw_REG_LSL(ed, ed, x4, u8);

                     if(wback) {

@@ -1607,8 +1602,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     u8 = F8;

                     u8&=(rex.w?0x3f:0x1f);

-                    BFXILxw(xFlags, ed, u8, 1);  // inject 1 bit from u8 to F_CF (i.e. pos 0)

-                    UBFXw(x4, xFlags, 0, 1);

+                    IFX(X_CF) {

+                        BFXILxw(xFlags, ed, u8, 1);  // inject 1 bit from u8 to F_CF (i.e. pos 0)

+                    }

                     MOV32w(x4, 1);

                     BICxw_REG_LSL(ed, ed, x4, u8);

                     if(wback) {

@@ -1631,7 +1627,9 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     u8 = F8;

                     u8&=(rex.w?0x3f:0x1f);

-                    BFXILxw(xFlags, ed, u8, 1);  // inject 1 bit from u8 to F_CF (i.e. pos 0)

+                    IFX(X_CF) {

+                        BFXILxw(xFlags, ed, u8, 1);  // inject 1 bit from u8 to F_CF (i.e. pos 0)

+                    }

                     MOV32w(x4, 1);

                     EORxw_REG_LSL(ed, ed, x4, u8);

                     if(wback) {

@@ -1666,13 +1664,10 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             } else {

                 ANDw_mask(x2, gd, 0, 0b00100);  //mask=0x00000001f

             }

-            LSRxw_REG(x4, ed, x2);

-            if(rex.w) {

-                ANDx_mask(x4, x4, 1, 0, 0);  //mask=1

-            } else {

-                ANDw_mask(x4, x4, 0, 0);  //mask=1

+            IFX(X_CF) {

+                LSRxw_REG(x4, ed, x2);

+                BFIw(xFlags, x4, F_CF, 1);

             }

-            BFIw(xFlags, x4, F_CF, 1);

             MOV32w(x4, 1);

             LSLxw_REG(x4, x4, x2);

             EORxw_REG(ed, ed, x4);

diff --git a/src/dynarec/arm64/dynarec_arm64_64.c b/src/dynarec/arm64/dynarec_arm64_64.c
index 0bf2576b..9af908e2 100644
--- a/src/dynarec/arm64/dynarec_arm64_64.c
+++ b/src/dynarec/arm64/dynarec_arm64_64.c
@@ -233,8 +233,8 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                     UFLAG_RES(gd);
                                     LSRx(x3, gd, 32);
                                     UFLAG_OP1(x3);
-                                    UFLAG_DF(x3, d_imul32);
                                     MOVw_REG(gd, gd);
+                                    UFLAG_DF(x3, d_imul32);
                                 } else {
                                     MULxw(gd, gd, ed);
                                 }
@@ -966,7 +966,6 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 4:
                     INST_NAME("MUL EAX, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x2, rex.w?d_mul64:d_mul32);
                     GETEDO(x6, 0);
                     if(rex.w) {
                         if(ed==xRDX) gd=x3; else gd=xRDX;
@@ -980,11 +979,11 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
+                    UFLAG_DF(x2, rex.w?d_mul64:d_mul32);
                     break;
                 case 5:
                     INST_NAME("IMUL EAX, Ed");
                     SETFLAGS(X_ALL, SF_PENDING);
-                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);
                     GETEDO(x6, 0);
                     if(rex.w) {
                         if(ed==xRDX) gd=x3; else gd=xRDX;
@@ -998,6 +997,7 @@ uintptr_t dynarec64_64(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     UFLAG_RES(xRAX);
                     UFLAG_OP1(xRDX);
+                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);
                     break;
                 case 6:
                     INST_NAME("DIV Ed");
diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index bd5eeb58..9e8fe773 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -458,7 +458,6 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             }

             SETFLAGS(X_ALL, SF_PENDING);

             nextop = F8;

-            UFLAG_DF(x1, d_imul16);

             GETSEW(x1, (opcode==0x69)?2:1);

             if(opcode==0x69) i32 = F16S; else i32 = F8S;

             MOV32w(x2, i32);

@@ -466,6 +465,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             UFLAG_RES(x2);

             gd=x2;

             GWBACK;

+            UFLAG_DF(x1, d_imul16);

             break;

 

         case 0x70:

@@ -929,22 +929,22 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             switch((nextop>>3)&7) {

                 case 0:

                     INST_NAME("ROL Ew, Ib");

-                    MESSAGE(LOG_DUMP, "Need Optimization\n");

-                    SETFLAGS(X_OF|X_CF, SF_SET);

+                    if(geted_ib(dyn, addr, ninst, nextop)&15) {

+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);

+                    }

                     GETEW(x1, 1);

                     u8 = F8;

-                    MOV32w(x2, u8);

-                    CALL_(rol16, x1, x3);

+                    emit_rol16c(dyn, ninst, x1, u8&15, x4, x5);

                     EWBACK;

                     break;

                 case 1:

                     INST_NAME("ROR Ew, Ib");

-                    MESSAGE(LOG_DUMP, "Need Optimization\n");

-                    SETFLAGS(X_OF|X_CF, SF_SET);

+                    if(geted_ib(dyn, addr, ninst, nextop)&15) {

+                        SETFLAGS(X_OF|X_CF, SF_SUBSET_PENDING);

+                    }

                     GETEW(x1, 1);

                     u8 = F8;

-                    MOV32w(x2, u8);

-                    CALL_(ror16, x1, x3);

+                    emit_ror16c(dyn, ninst, x1, u8&15, x4, x5);

                     EWBACK;

                     break;

                 case 2:

@@ -1167,24 +1167,24 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 4:

                     INST_NAME("MUL AX, Ew");

                     SETFLAGS(X_ALL, SF_PENDING);

-                    UFLAG_DF(x1, d_mul16);

                     GETEW(x1, 0);

                     UXTHw(x2, xRAX);

                     MULw(x1, x2, x1);

                     UFLAG_RES(x1);

                     BFIx(xRAX, x1, 0, 16);

                     BFXILx(xRDX, x1, 16, 16);

+                    UFLAG_DF(x1, d_mul16);

                     break;

                 case 5:

                     INST_NAME("IMUL AX, Ew");

                     SETFLAGS(X_ALL, SF_PENDING);

-                    UFLAG_DF(x1, d_imul16);

                     GETSEW(x1, 0);

                     SXTHw(x2, xRAX);

                     MULw(x1, x2, x1);

                     UFLAG_RES(x1);

                     BFIx(xRAX, x1, 0, 16);

                     BFXILx(xRDX, x1, 16, 16);

+                    UFLAG_DF(x1, d_imul16);

                     break;

                 case 6:

                     INST_NAME("DIV Ew");

diff --git a/src/dynarec/arm64/dynarec_arm64_660f.c b/src/dynarec/arm64/dynarec_arm64_660f.c
index 174819e0..ba330878 100644
--- a/src/dynarec/arm64/dynarec_arm64_660f.c
+++ b/src/dynarec/arm64/dynarec_arm64_660f.c
@@ -2195,12 +2195,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
                 ed = x4;

             }

             ANDw_mask(x2, gd, 0, 0b000011);  // mask=0x0f

-            LSRw_REG(x1, ed, x2);

-            BFIw(xFlags, x1, F_CF, 1);

-            ANDSw_mask(x1, x1, 0, 0);  //mask=1

-            B_NEXT(cNE);

+            IFX(X_CF) {

+                LSRw_REG(x1, ed, x2);

+                BFIw(xFlags, x1, F_CF, 1);

+            }

             MOV32w(x1, 1);

-            LSLxw_REG(x1, x1, x2);

+            LSLw_REG(x1, x1, x2);

             ORRx_REG(ed, ed, x1);

             if(wback) {

                 STRH_U12(ed, wback, fixedaddress);

@@ -2231,12 +2231,12 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             INST_NAME("IMUL Gw,Ew");

             SETFLAGS(X_ALL, SF_PENDING);

             nextop = F8;

-            UFLAG_DF(x1, d_imul16);

             GETSEW(x1, 0);

             GETSGW(x2);

             MULw(x2, x2, x1);

             UFLAG_RES(x2);

             GWBACK;

+            UFLAG_DF(x1, d_imul16);

             break;

 

         case 0xB3:

@@ -2452,6 +2452,16 @@ uintptr_t dynarec64_660F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int n
             BFIx(gd, x1, 0, 16);

             break;

 

+        case 0xC1:

+            INST_NAME("XADD Gw, Ew");

+            SETFLAGS(X_ALL, SF_SET_PENDING);

+            nextop = F8;

+            GETGW(x1);

+            GETEW(x2, 0);

+            BFIx(xRAX+((nextop&0x38)>>3)+(rex.r<<3), ed, 0, 16);

+            emit_add16(dyn, ninst, ed, gd, x4, x5);

+            EWBACK;

+            break;

         case 0xC2:

             INST_NAME("CMPPD Gx, Ex, Ib");

             nextop = F8;

diff --git a/src/dynarec/arm64/dynarec_arm64_67.c b/src/dynarec/arm64/dynarec_arm64_67.c
index 7c4ea045..5cc96deb 100644
--- a/src/dynarec/arm64/dynarec_arm64_67.c
+++ b/src/dynarec/arm64/dynarec_arm64_67.c
@@ -1008,7 +1008,6 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 case 4:

                     INST_NAME("MUL EAX, Ed");

                     SETFLAGS(X_ALL, SF_PENDING);

-                    UFLAG_DF(x2, rex.w?d_mul64:d_mul32);

                     GETED32(0);

                     if(rex.w) {

                         if(ed==xRDX) gd=x3; else gd=xRDX;

@@ -1022,11 +1021,11 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     UFLAG_RES(xRAX);

                     UFLAG_OP1(xRDX);

+                    UFLAG_DF(x2, rex.w?d_mul64:d_mul32);

                     break;

                 case 5:

                     INST_NAME("IMUL EAX, Ed");

                     SETFLAGS(X_ALL, SF_PENDING);

-                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);

                     GETED32(0);

                     if(rex.w) {

                         if(ed==xRDX) gd=x3; else gd=xRDX;

@@ -1040,6 +1039,7 @@ uintptr_t dynarec64_67(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     UFLAG_RES(xRAX);

                     UFLAG_OP1(xRDX);

+                    UFLAG_DF(x2, rex.w?d_imul64:d_imul32);

                     break;

                 case 6:

                     INST_NAME("DIV Ed");

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_math.c b/src/dynarec/arm64/dynarec_arm64_emit_math.c
index d23dde0f..3a7cc285 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_math.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_math.c
@@ -363,7 +363,7 @@ void emit_add8c(dynarec_arm_t* dyn, int ninst, int s1, int c, int s3, int s4)
         SET_DFNONE(s3);
     }
     IFX(X_AF | X_OF) {
-        if(X_PEND) {} else {MOV32w(s4, c&0xff);}
+        IFX(X_PEND) {} else {MOV32w(s4, c&0xff);}
         ORRw_REG(s3, s1, s4);       // s3 = op1 | op2
         ANDw_REG(s4, s1, s4);       // s4 = op1 & op2
     }
@@ -1766,10 +1766,9 @@ void emit_neg8(dynarec_arm_t* dyn, int ninst, int s1, int s3, int s4)
     IFX(X_AF|X_OF) {
         MOVw_REG(s3, s1);
     }
+    NEGw_REG(s1, s1);
     IFX(X_ZF) {
-        NEGSw_REG(s1, s1);
-    } else {
-        NEGw_REG(s1, s1);
+        ANDSw_mask(s1, s1, 0, 7);   // mask 0xff
     }
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index 7c4a0bf4..485489a4 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -154,6 +154,12 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         LSRxw_REG(s3, s1, s3);
         BFIw(xFlags, s3, 0, 1);
     }
+    IFX(X_OF) {
+        CMPSxw_U12(s2, 1);   // if s2==1
+            Bcond(cNE, 4+2*4);
+            LSRxw(s4, s1, rex.w?63:31);
+            BFIw(xFlags, s4, F_OF, 1);
+    }
     LSRxw_REG(s1, s1, s2);
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
@@ -167,13 +173,6 @@ void emit_shr32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         LSRxw(s4, s1, (rex.w)?63:31);
         BFIx(xFlags, s4, F_SF, 1);
     }
-    IFX(X_OF) {
-        CMPSxw_U12(s2, 1);   // if s2==1
-            Bcond(cNE, 4+3*4);
-            LSRxw(s4, s1, rex.w?62:30);
-            EORw_REG_LSR(s4, s4, s4, 1);
-            BFIw(xFlags, s4, F_OF, 1);
-    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -197,8 +196,18 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
         return;
     }
     IFX(X_CF) {
-        LSRxw(s3, s1, c-1);
-        BFIw(xFlags, s3, 0, 1);
+        if(c==1) {
+            BFIw(xFlags, s1, 0, 1);
+        } else {
+            LSRxw(s3, s1, c-1);
+            BFIw(xFlags, s3, 0, 1);
+        }
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            LSRxw(s4, s1, rex.w?63:31);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     LSRxw(s1, s1, c);
     IFX(X_PEND) {
@@ -213,13 +222,6 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
         LSRxw(s4, s1, (rex.w)?63:31);
         BFIx(xFlags, s4, F_SF, 1);
     }
-    IFX(X_OF) {
-        if(c==1) {
-            LSRxw(s4, s1, rex.w?62:30);
-            EORw_REG_LSR(s4, s4, s4, 1);
-            BFIw(xFlags, s4, F_OF, 1);
-        }
-    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -259,6 +261,10 @@ void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
         LSRxw(s4, s1, (rex.w)?63:31);
         BFIx(xFlags, s4, F_SF, 1);
     }
+    IFX(X_OF)
+        if(c==1) {
+            BFCw(xFlags, F_OF, 1);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
@@ -318,8 +324,7 @@ void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_CF) {
-        LSRxw(s3, s1, rex.w?63:31);
-        BFIw(xFlags, s3, F_CF, 1);
+        BFXILxw(xFlags, s1, rex.w?63:31, 1);
     }
     IFX(X_OF) {
         if(c==1) {
@@ -330,6 +335,142 @@ void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
     }
 }
 
+// emit ROL8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_rol8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    int rc = 8-(c&7);
+    ORRw_REG_LSL(s1, s1, s1, 8);
+    LSRw(s1, s1, rc);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        BFIw(xFlags, s1, F_CF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            EORw_REG_LSR(s3, s1, s1, 7);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+}
+
+// emit ROR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRB_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_ror8);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    ORRw_REG_LSL(s1, s1, s1, 8);
+    LSRw(s1, s1, c&7);
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        BFXILw(xFlags, s1, 7, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            LSRw(s3, s1, 6);
+            EORw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+}
+
+// emit ROL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRH_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_rol16);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    int rc = 16-(c&15);
+    ORRw_REG_LSL(s1, s1, s1, 16);
+    LSRw(s1, s1, rc);
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        BFIw(xFlags, s1, F_CF, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            EORw_REG_LSR(s3, s1, s1, 15);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+}
+
+// emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4)
+{
+    MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
+    IFX(X_PEND) {
+        MOV32w(s3, c);
+        STRH_U12(s3, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_ror16);
+    } else IFX(X_ALL) {
+        SET_DFNONE(s4);
+    }
+    if(!c) {
+        IFX(X_PEND) {
+            STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+    ORRw_REG_LSL(s1, s1, s1, 16);
+    LSRw(s1, s1, c&15);
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_CF) {
+        BFXILw(xFlags, s1, 15, 1);
+    }
+    IFX(X_OF) {
+        if(c==1) {
+            LSRw(s3, s1, 14);
+            EORw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    }
+}
+
 // emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
 {
diff --git a/src/dynarec/arm64/dynarec_arm64_helper.h b/src/dynarec/arm64/dynarec_arm64_helper.h
index cdf69a72..1c0fc352 100644
--- a/src/dynarec/arm64/dynarec_arm64_helper.h
+++ b/src/dynarec/arm64/dynarec_arm64_helper.h
@@ -787,7 +787,17 @@
     x87_do_pop(dyn, ninst, scratch);
 
 #define SET_DFNONE(S)    if(!dyn->f.dfnone) {STRw_U12(wZR, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=1;}
-#define SET_DF(S, N)     if((N)!=d_none) {MOVZw(S, (N)); STRw_U12(S, xEmu, offsetof(x64emu_t, df)); dyn->f.dfnone=0;} else SET_DFNONE(S)
+#define SET_DF(S, N)        \
+    if((N)!=d_none) {       \
+        MOVZw(S, (N));      \
+        STRw_U12(S, xEmu, offsetof(x64emu_t, df)); \
+        if(dyn->f.pending==SF_PENDING && dyn->insts[ninst].x64.need_after && !(dyn->insts[ninst].x64.need_after&X_PEND)) {  \
+            CALL_(UpdateFlags, -1, 0);              \
+            dyn->f.pending = SF_SET;                \
+            SET_NODF();     \
+        }                   \
+        dyn->f.dfnone=0;    \
+    } else SET_DFNONE(S)
 #define SET_NODF()          dyn->f.dfnone = 0
 #define SET_DFOK()          dyn->f.dfnone = 1
 
@@ -1015,6 +1025,10 @@ void* arm64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_sar32c     STEPNAME(emit_sar32c)
 #define emit_rol32c     STEPNAME(emit_rol32c)
 #define emit_ror32c     STEPNAME(emit_ror32c)
+#define emit_rol8c      STEPNAME(emit_rol8c)
+#define emit_ror8c      STEPNAME(emit_ror8c)
+#define emit_rol16c     STEPNAME(emit_rol16c)
+#define emit_ror16c     STEPNAME(emit_ror16c)
 #define emit_shrd32c    STEPNAME(emit_shrd32c)
 #define emit_shld32c    STEPNAME(emit_shld32c)
 
@@ -1144,6 +1158,10 @@ void emit_shr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
 void emit_sar32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
+void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
+void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
+void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s4);
 void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 
diff --git a/src/dynarec/dynarec_native_functions.c b/src/dynarec/dynarec_native_functions.c
index 434fbd8a..d70beca9 100644
--- a/src/dynarec/dynarec_native_functions.c
+++ b/src/dynarec/dynarec_native_functions.c
@@ -474,6 +474,12 @@ uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nexto
     }
     return addr;
 }
+// return Ib on a mod/rm opcode without emiting anything
+uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop)
+{
+    addr = fakeed(dyn, addr, ninst, nextop);
+    return F8;
+}
 #undef F8
 
 int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn)
diff --git a/src/dynarec/dynarec_native_functions.h b/src/dynarec/dynarec_native_functions.h
index ed6d0f74..06585c4e 100644
--- a/src/dynarec/dynarec_native_functions.h
+++ b/src/dynarec/dynarec_native_functions.h
@@ -57,6 +57,8 @@ int getNominalPred(dynarec_native_t* dyn, int ninst);
 
 // Do the GETED, but don't emit anything...
 uintptr_t fakeed(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
+// return Ib on a mod/rm opcode without emiting anything
+uint8_t geted_ib(dynarec_native_t* dyn, uintptr_t addr, int ninst, uint8_t nextop);
 
 // Is what pointed at addr a native call? And if yes, to what function?
 int isNativeCall(dynarec_native_t* dyn, uintptr_t addr, uintptr_t* calladdress, int* retn);