about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c18
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c8
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c49
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c376
4 files changed, 212 insertions, 239 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 1bb39f19..8ccf25ba 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -2784,7 +2784,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     if(BOX64DRENV(dynarec_safeflags)>1)
                         MAYSETFLAGS();
-                    SET_DFNONE();
                     UFLAG_IF {
                         ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                         CBZw_NEXT(x2);
@@ -2793,10 +2792,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOV32w(x4, 8);
                     SUBx_REG(x2, x4, x2);
                     GETEB(x1, 0);
+                    IFX2(X_OF, && !BOX64ENV(cputype)) {
+                        LSRw(x4, ed, 6);
+                        EORw_REG_LSR(x4, x4, x4, 1);
+                        BFIw(xFlags, x4, F_OF, 1);
+                    }
                     ORRw_REG_LSL(ed, ed, ed, 8);
                     LSRw_REG(ed, ed, x2);
                     EBBACK;
-                    IFX(X_OF) {
+                    IFX2(X_OF, && BOX64ENV(cputype)) {
                         EORxw_REG_LSR(x3, ed, ed, 7);
                         BFIw(xFlags, x3, F_OF, 1);
                     }
@@ -2809,7 +2813,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     if(BOX64DRENV(dynarec_safeflags)>1)
                         MAYSETFLAGS();
-                    SET_DFNONE();
                     UFLAG_IF {
                         ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                         CBZw_NEXT(x2);
@@ -2838,7 +2841,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     if(BOX64DRENV(dynarec_safeflags)>1)
                         MAYSETFLAGS();
-                    SET_DFNONE();
                     ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                     // get CL % 9
                     MOV32w(x3, 0x1c72); // 0x10000 / 9 + 1 (this is precise enough in the 0..31 range)
@@ -2848,6 +2850,11 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MSUBw(x2, x3, x4, x2);  // CL mod 9
                     CBZw_NEXT(x2);
                     GETEB(x1, 0);
+                    IFX2(X_OF, && !BOX64ENV(cputype)) {
+                        LSRw(x5, ed, 6);
+                        EORw_REG_LSR(x5, x5, x5, 1);
+                        BFIw(xFlags, x5, F_OF, 1);
+                    }
                     BFIw(ed, xFlags, 8, 1); // insert CF
                     ORRw_REG_LSL(ed, ed, ed, 9);    // insert rest of ed
                     SUBw_REG(x2, x4, x2);
@@ -2857,7 +2864,7 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     LSRw_REG(ed, ed, x2);
                     EBBACK;
-                    IFX(X_OF) {
+                    IFX2(X_OF, && BOX64ENV(cputype)) {
                         EORw_REG_LSR(x2, x5, ed, 7);
                         BFIw(xFlags, x2, F_OF, 1);
                     }
@@ -2871,7 +2878,6 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     if(BOX64DRENV(dynarec_safeflags)>1)
                         MAYSETFLAGS();
-                    SET_DFNONE();
                     ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
                     // get CL % 9
                     MOV32w(x3, 0x1c72); // 0x10000 / 9 + 1
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index ca43093f..a157a9a4 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -2322,7 +2322,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;

         case 0xBC:

             INST_NAME("BSF Gd, Ed");

-            SETFLAGS(X_ZF, SF_SET_DF);

+            SETFLAGS(X_ZF, SF_SET);

             SET_DFNONE();

             nextop = F8;

             GETED(0);

@@ -2345,7 +2345,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             break;

         case 0xBD:

             INST_NAME("BSR Gd, Ed");

-            SETFLAGS(X_ZF, SF_SET_DF);

+            SETFLAGS(X_ZF, SF_SET);

             SET_DFNONE();

             nextop = F8;

             GETED(0);

@@ -2550,7 +2550,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             if(MODREG) switch((nextop>>3)&7) {

             case 6:

                 INST_NAME("RDRAND Ed");

-                SETFLAGS(X_ALL, SF_SET_DF);

+                SETFLAGS(X_ALL, SF_SET);

                 SET_DFNONE();

                 GETED(0);

                 IFX(X_OF|X_SF|X_ZF|X_PF|X_AF) {

@@ -2610,7 +2610,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 break;

             case 6:

                 INST_NAME("RDRAND Ed");

-                SETFLAGS(X_ALL, SF_SET_DF);

+                SETFLAGS(X_ALL, SF_SET);

                 SET_DFNONE();

                 IFX(X_OF|X_SF|X_ZF|X_PF|X_AF) {

                     MOV32w(x1, (1<<F_OF)|(1<<F_SF)|(1<<F_ZF)|(1<<F_PF)|(1<<F_AF));

diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index 74b19c31..46cf8c90 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -446,7 +446,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             GWBACK;

             SET_DFNONE();

             IFX(X_CF|X_OF) {

-                ASRw(x1, x2, 16);

+                ASRw(x1, x2, 15);

                 CMPSw_REG_ASR(x1, x2, 31);

                 CSETw(x3, cNE);

                 IFX(X_CF) {

@@ -459,7 +459,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             IFX(X_AF) {BFCw(xFlags, F_AF, 1);}

             IFX(X_ZF) {BFCw(xFlags, F_ZF, 1);}

             IFX(X_SF) {

-                LSRxw(x3, x2, 15);

+                LSRw(x3, x2, 15);

                 BFIw(xFlags, x3, F_SF, 1);

             }

             IFX(X_PF) emit_pf(dyn, ninst, x2, x3);

@@ -1028,7 +1028,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             switch((nextop>>3)&7) {

                 case 0:

                     INST_NAME("ROL Ew, Ib");

-                    u8 = geted_ib(dyn, addr, ninst, nextop) & 15;

+                    u8 = geted_ib(dyn, addr, ninst, nextop) & 0x1f;

                     if (u8) {

                         SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                         GETEW(x1, 1);

@@ -1042,7 +1042,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

                 case 1:

                     INST_NAME("ROR Ew, Ib");

-                    if (geted_ib(dyn, addr, ninst, nextop) & 15) {

+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {

                         SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                         GETEW(x1, 1);

                         u8 = (F8)&0x1f;

@@ -1055,7 +1055,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

                 case 2:

                     INST_NAME("RCL Ew, Ib");

-                    if (geted_ib(dyn, addr, ninst, nextop) & 31) {

+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {

                         READFLAGS(X_CF);

                         SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                         GETEW(x1, 1);

@@ -1069,7 +1069,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     break;

                 case 3:

                     INST_NAME("RCR Ew, Ib");

-                    if (geted_ib(dyn, addr, ninst, nextop) & 31) {

+                    if (geted_ib(dyn, addr, ninst, nextop) & 0x1f) {

                         READFLAGS(X_CF);

                         SETFLAGS(X_OF|X_CF, SF_SUBSET); // removed PENDING on purpose

                         GETEW(x1, 1);

@@ -1222,17 +1222,20 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOV32w(x4, 16);

                     SUBx_REG(x2, x4, x2);

                     GETEW(x1, 0);

+                    IFX2(X_OF, && !BOX64ENV(cputype)) {

+                        LSRw(x4, ed, 14);

+                        EORw_REG_LSR(x4, x4, x4, 1);

+                        BFIw(xFlags, x4, F_OF, 1);

+                    }

                     ORRw_REG_LSL(ed, ed, ed, 16);

                     LSRw_REG(ed, ed, x2);

                     EWBACK;

-                    UFLAG_IF {  // calculate flags directly

-                        SUBw_U12(x2, x2, 15);

-                        CBNZw_MARK(x2);

-                            EORw_REG_LSR(x3, ed, ed, 15);

-                            BFIw(xFlags, x3, F_OF, 1);

-                        MARK;

+                    IFX2(X_OF, && BOX64ENV(cputype)) {

+                        EORxw_REG_LSR(x3, ed, ed, 15);

+                        BFIw(xFlags, x3, F_OF, 1);

+                    }

+                    IFX(X_CF) {

                         BFIw(xFlags, ed, F_CF, 1);

-                        UFLAG_DF(x2, d_none);

                     }

                     break;

                 case 1:

@@ -1246,18 +1249,20 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }

                     ANDw_mask(x2, xRCX, 0, 0b00011);  //mask=0x00000000f

                     GETEW(x1, 0);

+                    IFX2(X_OF, && !BOX64ENV(cputype)) {

+                        EORw_REG_LSR(x4, ed, ed, 15);

+                        BFIw(xFlags, x4, F_OF, 1);

+                    }

                     ORRw_REG_LSL(ed, ed, ed, 16);

                     LSRw_REG(ed, ed, x2);

                     EWBACK;

-                    UFLAG_IF {  // calculate flags directly

-                        SUBw_U12(x2, x2, 1);

-                        CBNZw_MARK(x2);

-                            LSRxw(x2, ed, 14); // x2 = d>>14

-                            EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>14) ^ ((d>>14)>>1))

-                            BFIw(xFlags, x2, F_OF, 1);

-                        MARK;

+                    IFX2(X_OF, && BOX64ENV(cputype)) {

+                        LSRxw(x2, ed, 14); // x2 = d>>6

+                        EORw_REG_LSR(x2, x2, x2, 1); // x2 = ((d>>14) ^ ((d>>14)>>1))

+                        BFIw(xFlags, x2, F_OF, 1);

+                    }

+                    IFX(X_CF) {

                         BFXILw(xFlags, ed, 15, 1);

-                        UFLAG_DF(x2, d_none);

                     }

                     break;

                 case 2:

@@ -1447,7 +1452,7 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     BFXILx(xRDX, x1, 16, 16);

                     SET_DFNONE();

                     IFX(X_CF|X_OF) {

-                        ASRw(x2, x1, 16);

+                        ASRw(x2, x1, 15);

                         CMPSw_REG_ASR(x2, x1, 31);

                         CSETw(x3, cNE);

                         IFX(X_CF) {

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index 6637ca51..7a290e54 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -59,19 +59,17 @@ void emit_shl32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    if(BOX64ENV(cputype))
-        IFX(X_OF) {
-            LSRxw(s4, s1, (rex.w)?63:31);
-            EORxw_REG(s3, s4, xFlags);  // CF is set if OF is asked
-            BFIw(xFlags, s3, F_OF, 1);
-        }
+    IFX2(X_OF, && BOX64ENV(cputype)) {
+        EORxw_REG_LSR(s3, xFlags, s1, rex.w?63:31);  // CF is set if OF is asked
+        BFIw(xFlags, s3, F_OF, 1);
+    }
     int need_tst = 0;
     IFX(X_ZF) need_tst = 1;
     IFXNATIVE(X_SF, NF_SF) need_tst = 1;
     if(need_tst) TSTxw_REG(s1, s1);
     IFX(X_SF) {
         IFNATIVE(NF_SF) {} else {
-            IFX2(X_OF, && BOX64ENV(cputype)) {} else {LSRxw(s4, s1, (rex.w)?63:31);}
+            LSRxw(s4, s1, (rex.w)?63:31);
             BFIw(xFlags, s4, F_SF, 1);
         }
     }
@@ -427,69 +425,38 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
     } else {
         SET_DFNONE();
     }
-    if(c<8) {
-	    if(BOX64ENV(cputype)) {
-            IFX(X_CF|X_OF) {
-                BFXILw(xFlags, s1, 8-c, 1);
-            }
-        } else {
-            IFX(X_OF) {
-                LSRw(s4, s1, 6);
-                EORw_REG_LSR(s4, s4, s4, 1);
-                BFIw(xFlags, s4, F_OF, 1);
-            }
-            IFX(X_CF) {
-                BFXILw(xFlags, s1, 8-c, 1);
-            }
-        }
-        LSLw(s1, s1, c);
-
-        IFX(X_PEND) {
-            STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        COMP_ZFSF(s1, 8)
-        if(BOX64ENV(cputype))
-            IFX(X_OF) {
-                IFX2(X_SF, && !arm64_flagm) {} else {LSRw(s3, s1, 7);}  //use COMP_ZFSF operation
-                EORw_REG(s4, s3, xFlags);  // CF is set if OF is asked
-                BFIw(xFlags, s4, F_OF, 1);
-            }
-        IFX (X_AF) {
-            BFCw(xFlags, F_AF, 1);
-        }
-        IFX(X_PF) {
-            emit_pf(dyn, ninst, s1, s4);
+    if(BOX64ENV(cputype)) {
+        IFX(X_CF|X_OF) {
+            BFXILw(xFlags, s1, 8-(c&7), 1);
         }
     } else {
-        IFXNATIVE(X_ZF|X_SF|X_CF, NF_EQ|NF_SF|NF_CF) {
-            SUBSw_REG(s1, s1, s1);
-        } else {
-            MOVw_REG(s1, xZR);
-        }
-        IFX(X_PEND) {
-            STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+        IFX(X_OF) {
+            LSRw(s4, s1, 6);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);
         }
+    }
+    LSLw(s1, s1, c);
+    if(!BOX64ENV(cputype))
         IFX(X_CF) {
-            // sub X, X will generate a carry flags
-            IFNATIVE(NF_CF) {GEN_INVERTED_CARRY();} else {BFCw(xFlags, F_CF, 1);}
+            BFXILw(xFlags, s1, 8, 1);
         }
+
+    IFX(X_PEND) {
+        STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    COMP_ZFSF(s1, 8)
+    if(BOX64ENV(cputype))
         IFX(X_OF) {
-            IFNATIVE(NF_VF) {} else BFCw(xFlags, F_OF, 1);
-        }
-        IFX(X_SF) {
-            IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1);
-        }
-        IFX (X_AF) {
-            BFCw(xFlags, F_AF, 1);
-        }
-        IFX(X_ZF) {
-            IFNATIVE(NF_EQ) {} else {
-                ORRw_mask(xFlags, xFlags, 26, 0);   //1<<F_ZF
-            }
-        }
-        IFX(X_PF) {
-            ORRw_mask(xFlags, xFlags, 30, 0);   //1<<F_PF
+            IFX2(X_SF, && !arm64_flagm) {} else {LSRw(s3, s1, 7);}  //use COMP_ZFSF operation
+            EORw_REG(s4, s3, xFlags);  // CF is set if OF is asked
+            BFIw(xFlags, s4, F_OF, 1);
         }
+    IFX (X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s4);
     }
 }
 
@@ -707,75 +674,42 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
     } else {
         SET_DFNONE();
     }
-    if(c<16) {
-        if(BOX64ENV(cputype)) {
-            IFX(X_CF|X_OF) {
-                LSRw(s3, s1, 16-c);
-                BFIw(xFlags, s3, F_CF, 1);
-            }
-        } else {
-            IFX(F_OF) {
-                LSRw(s4, s1, 14);
-                EORw_REG_LSR(s4, s4, s4, 1);
-                BFIw(xFlags, s4, F_OF, 1);
-            }
-            IFX(X_CF) {
-                LSRw(s3, s1, 16-c);
-                BFIw(xFlags, s3, F_CF, 1);
-            }
-        }
-        LSLw(s1, s1, c);
-
-        IFX(X_PEND) {
-            STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        COMP_ZFSF(s1, 16)
-        if(BOX64ENV(cputype))
-            IFX(X_OF) {
-                IFX2(X_SF, && !arm64_flagm) {} else {LSRw(s3, s1, 15);} // use COMP_ZFSF operation
-                EORw_REG(s4, s3, xFlags);  // CF is set if OF is asked
-                BFIw(xFlags, s4, F_OF, 1);
-            }
-        IFX (X_AF) {
-            BFCw(xFlags, F_AF, 1);
-        }
-        IFX(X_PF) {
-            if(c>7) {
-                // the 0xff area will be 0, so PF is known
-                ORRw_mask(xFlags, xFlags, 30, 0);   //1<<F_PF
-            } else
-                emit_pf(dyn, ninst, s1, s4);
+    if(BOX64ENV(cputype)) {
+        IFX(X_CF|X_OF) {
+            LSRw(s3, s1, 16-c);
+            BFIw(xFlags, s3, F_CF, 1);
         }
     } else {
-        IFXNATIVE(X_ZF|X_SF|X_CF, NF_EQ|NF_SF|NF_CF) {
-            SUBSw_REG(s1, s1, s1);
-        } else {
-            MOVw_REG(s1, xZR);
-        }
-        IFX(X_PEND) {
-            STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        IFX(X_OF) {
-            BFCw(xFlags, F_OF, 1);
+        IFX(F_OF) {
+            LSRw(s4, s1, 14);
+            EORw_REG_LSR(s4, s4, s4, 1);
+            BFIw(xFlags, s4, F_OF, 1);
         }
+    }
+    LSLw(s1, s1, c);
+    if(!BOX64ENV(cputype))
         IFX(X_CF) {
-            // sub X, X will generate a carry flags
-            IFNATIVE(NF_CF) {GEN_INVERTED_CARRY();} else {BFCw(xFlags, F_CF, 1);}
-        }
-        IFX(X_SF) {
-            IFNATIVE(NF_SF) {} else BFCw(xFlags, F_SF, 1);
+            BFXILw(xFlags, s1, 16, 1);
         }
-        IFX (X_AF) {
-            BFCw(xFlags, F_AF, 1);
-        }
-        IFX(X_ZF) {
-            IFNATIVE(NF_EQ) {} else {
-                ORRw_mask(xFlags, xFlags, 26, 0);   //1<<F_ZF
-            }
+
+    IFX(X_PEND) {
+        STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    COMP_ZFSF(s1, 16)
+    if(BOX64ENV(cputype))
+        IFX(X_OF) {
+            EORw_REG_LSR(s4, xFlags, s1, 15);  // CF is set if OF is asked
+            BFIw(xFlags, s4, F_OF, 1);
         }
-        IFX(X_PF) {
+    IFX (X_AF) {
+        BFCw(xFlags, F_AF, 1);
+    }
+    IFX(X_PF) {
+        if(c>7) {
+            // the 0xff area will be 0, so PF is known
             ORRw_mask(xFlags, xFlags, 30, 0);   //1<<F_PF
-        }
+        } else
+            emit_pf(dyn, ninst, s1, s4);
     }
 }
 
@@ -919,16 +853,22 @@ void emit_rol32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
 
     if (!c) return;
 
-    SET_DFNONE();
+    if(!BOX64ENV(cputype))
+        IFX(X_OF) {
+            LSRxw(s3, s1, rex.w?62:30);
+            EORw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
 
     RORxw(s1, s1, (rex.w?64:32)-c);
     IFX(X_CF) {
         BFIw(xFlags, s1, F_CF, 1);
     }
-    IFX(X_OF) {
-        EORxw_REG_LSR(s3, s1, s1, rex.w?63:31);
-        BFIw(xFlags, s3, F_OF, 1);
-    }
+    if(BOX64ENV(cputype))
+        IFX(X_OF) {
+            EORxw_REG_LSR(s3, s1, s1, rex.w?63:31);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
 }
 
 // emit ROR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -938,8 +878,6 @@ void emit_ror32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
 
     if (!c) return;
 
-    SET_DFNONE();
-
     if(!BOX64ENV(cputype))
         IFX(X_OF) {
             EORxw_REG_LSR(s3, s1, s1, rex.w?63:31);
@@ -965,8 +903,11 @@ void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 
     if (!c) return;
 
-    SET_DFNONE();
-
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        LSRw(s3, s1, 6);
+        EORw_REG_LSR(s3, s3, s3, 1);
+        BFIw(xFlags, s3, F_OF, 1);
+    }
     if(c&7) {
         int rc = 8-(c&7);
         ORRw_REG_LSL(s1, s1, s1, 8);
@@ -975,7 +916,7 @@ void emit_rol8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
     IFX(X_CF) {
         BFIw(xFlags, s1, F_CF, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORw_REG_LSR(s3, s1, s1, 7);
         BFIw(xFlags, s3, F_OF, 1);
     }
@@ -988,8 +929,6 @@ void emit_ror8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 
     if (!c) return;
 
-    SET_DFNONE();
-
     if(!BOX64ENV(cputype))
         IFX(X_OF) {
             EORw_REG_LSR(s3, s1, s1, 7);
@@ -1018,8 +957,12 @@ void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 
     if (!c) return;
 
-    SET_DFNONE();
-
+    if(!BOX64ENV(cputype))
+        IFX(X_OF) {
+            LSRw(s3, s1, 14);
+            EORw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
     if(c&15) {
         int rc = 16-(c&15);
         ORRw_REG_LSL(s1, s1, s1, 16);
@@ -1028,10 +971,11 @@ void emit_rol16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
     IFX(X_CF) {
         BFIw(xFlags, s1, F_CF, 1);
     }
-    IFX(X_OF) {
-        EORw_REG_LSR(s3, s1, s1, 15);
-        BFIw(xFlags, s3, F_OF, 1);
-    }
+    if(BOX64ENV(cputype))
+        IFX(X_OF) {
+            EORw_REG_LSR(s3, s1, s1, 15);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
 }
 
 // emit ROR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1041,8 +985,6 @@ void emit_ror16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 
     if (!c) return;
 
-    SET_DFNONE();
-
     if(!BOX64ENV(cputype))
         IFX(X_OF) {
             EORw_REG_LSR(s3, s1, s1, 15);
@@ -1068,27 +1010,26 @@ void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 {
     MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
 
-    SET_DFNONE();
     if (!(c%9)) return;
 
     c%=9;
     BFIw(s1, xFlags, 8, 1); // insert cf
     IFX(X_OF|X_CF) {
-        if(c!=8) {
-            LSRw_IMM(s3, s1, 8-c);
-        } else {
-            MOVw_REG(s3, s1);
-        }
+        BFXILw(xFlags, s1, 8-c, 1);
     }
+    if(!BOX64ENV(cputype))
+        IFX(X_OF) {
+            LSRw(s3, s1, 6);
+            EORw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
     ORRw_REG_LSL(s1, s1, s1, 9);    // insert s1 again
     LSRw_IMM(s1, s1, 9-c); // do the rcl
-    IFX(X_CF) {
-        BFIw(xFlags, s3, F_CF, 1);
-    }
-    IFX(X_OF) {
-        EORw_REG_LSR(s3, s3, s1, 7);
-        BFIw(xFlags, s3, F_OF, 1);
-    }
+    if(BOX64ENV(cputype))
+        IFX(X_OF) {
+            EORw_REG_LSR(s3, xFlags, s1, 7);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
 }
 
 // emit RCR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1096,7 +1037,6 @@ void emit_rcr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 {
     MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
 
-    SET_DFNONE();
     if (!(c%9)) return;
 
     c%=9;
@@ -1119,27 +1059,26 @@ void emit_rcl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 {
     MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
 
-    SET_DFNONE();
     if (!(c%17)) return;
 
     c%=17;
     BFIw(s1, xFlags, 16, 1); // insert cf
     IFX(X_OF|X_CF) {
-        if(c!=16) {
-            LSRw_IMM(s3, s1, 16-c);
-        } else {
-            MOVw_REG(s3, s1);
-        }
+        BFXILw(xFlags, s1, 16-c, 1);
     }
+    if(!BOX64ENV(cputype))
+        IFX(X_OF) {
+            LSRw(s3, s1, 14);
+            EORw_REG_LSR(s3, s3, s3, 1);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
     ORRx_REG_LSL(s1, s1, s1, 17);    // insert s1 again
     LSRx_IMM(s1, s1, 17-c); // do the rcl
-    IFX(X_CF) {
-        BFIw(xFlags, s3, F_CF, 1);
-    }
-    IFX(X_OF) {
-        EORw_REG_LSR(s3, s3, s1, 15);
-        BFIw(xFlags, s3, F_OF, 1);
-    }
+    if(BOX64ENV(cputype))
+        IFX(X_OF) {
+            EORw_REG_LSR(s3, xFlags, s1, 15);
+            BFIw(xFlags, s3, F_OF, 1);
+        }
 }
 
 // emit RCR16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1147,7 +1086,6 @@ void emit_rcr16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
 {
     MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
 
-    SET_DFNONE();
     if (!(c%17)) return;
 
     c%=17;
@@ -1172,8 +1110,6 @@ void emit_rcl32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
 
     if(!c) return;
 
-    SET_DFNONE();
-
     IFX(X_OF|X_CF) {
         LSRxw_IMM(s3, s1, (rex.w?64:32)-c);
     }
@@ -1200,8 +1136,6 @@ void emit_rcr32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, i
 
     if(!c) return;
 
-    SET_DFNONE();
-
     IFX(X_OF) {
         EORxw_REG_LSR(s3, xFlags, s1, rex.w?63:31);
         BFIw(xFlags, s3, F_OF, 1);
@@ -1236,14 +1170,18 @@ void emit_shrd32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint
     IFX(X_CF) {
         BFXILxw(xFlags, s1, c-1, 1);    // set CF
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        EORx_REG_LSR(s3, s2, s1, rex.w?63:31);   // OF is set if sign changed
+        BFIw(xFlags, s3, F_OF, 1);
+    }
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRxw(s4, s1, rex.w?63:31);
     }
     EXTRxw(s1, s2, s1, c);
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORx_REG_LSR(s3, s4, s1, rex.w?63:31);   // OF is set if sign changed
         BFIw(xFlags, s3, F_OF, 1);
     }
@@ -1282,19 +1220,22 @@ void emit_shld32c(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, uint
     IFX(X_CF) {
         BFXILx(xFlags, s1, (rex.w?64:32)-c, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRxw(s4, s1, rex.w?63:31);
     }
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        LSRxw(s3, s1, rex.w?62:30);
+        EORx_REG_LSR(s3, s3, s3, 1);   // OF is set if sign changed
+        BFIw(xFlags, s3, F_OF, 1);
+    }
     EXTRxw(s1, s1, s2, (rex.w?64:32)-c);
 
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_OF) {
-        if((c==1) || (BOX64DRENV(dynarec_safeflags)>1)) {
-            EORx_REG_LSR(s3, s4, s1, rex.w?63:31);   // OF is set if sign changed
-            BFIw(xFlags, s3, F_OF, 1);
-        }
+    IFX2(X_OF, && BOX64ENV(cputype)) {
+        EORx_REG_LSR(s3, s4, s1, rex.w?63:31);   // OF is set if sign changed
+        BFIw(xFlags, s3, F_OF, 1);
     }
     int need_tst = 0;
     IFX(X_ZF) need_tst = 1;
@@ -1334,7 +1275,11 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         LSRxw_REG(s3, s1, s3);
         BFIw(xFlags, s3, F_CF, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        EORx_REG_LSR(s3, s2, s1, rex.w?63:31);   // OF is set if sign changed
+        BFIw(xFlags, s3, F_OF, 1);
+    }
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRxw(s4, s1, rex.w?63:31);
     }
     if(s1==s2) {
@@ -1349,7 +1294,7 @@ void emit_shrd32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORxw_REG_LSR(s3, s4, s1, rex.w?63:31);   // OF is set if sign changed
         BFIw(xFlags, s3, F_OF, 1);
     }
@@ -1390,9 +1335,14 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         LSRxw_REG(s4, s1, s3);
         BFIxw(xFlags, s4, F_CF, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRxw(s4, s1, rex.w?63:31);
     }
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        LSRxw(s4, s1, rex.w?62:30);
+        EORx_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     if(s1==s2) {
         RORxw_REG(s1, s1, s3);
     } else {
@@ -1404,7 +1354,7 @@ void emit_shld32(dynarec_arm_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     IFX(X_PEND) {
         STRxw_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORx_REG_LSR(s3, s4, s1, rex.w?63:31);   // OF is set if sign changed
         BFIw(xFlags, s3, F_OF, 1);
     }
@@ -1442,28 +1392,26 @@ void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
     } else {
         SET_DFNONE();
     }
-    if(!c) {
-        IFX(X_PEND) {
-            STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
-        }
-        return;
-    }
     ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
     IFX(X_CF) {
         BFXILw(xFlags, s1, c-1, 1);    // set CF
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRw(s4, s1, 15);
     }
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        EORw_REG_LSR(s4, s2, s1, 15);
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     RORw(s1, s1, c);
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    COMP_ZFSF(s1, 16)
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORx_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
         BFIw(xFlags, s3, F_OF, 1);
     }
+    COMP_ZFSF(s1, 16)
     IFX(X_AF) {BFCw(xFlags, F_AF, 1);}
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s4);
@@ -1487,14 +1435,18 @@ void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
         LSRw_REG(s3, s1, s3);
         BFIw(xFlags, s3, F_CF, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRw(s4, s1, 15);
     }
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        EORw_REG_LSR(s4, s2, s1, 15);
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     RORw_REG(s1, s1, s5);
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORw_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
         BFIw(xFlags, s3, F_OF, 1);
     }
@@ -1517,16 +1469,21 @@ void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
         SET_DFNONE();
     }
     BFIw(s1, s2, 16, 16);   // create concat first
-    IFX2(X_CF, && c) {
-        if(c<16)
+    IFX(X_CF) {
+        if(c<=16)
             LSRw(s3, s1, 16-c);
         else
             LSRw(s3, s2, 32-c);
         BFIw(xFlags, s3, F_CF, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRw(s4, s1, 15);
     }
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        LSRw(s4, s1, 14);
+        EORw_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     if(c)
         RORw(s1, s1, 32-c);
 
@@ -1534,7 +1491,7 @@ void emit_shld16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     COMP_ZFSF(s1, 16)
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORw_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
         BFIw(xFlags, s3, F_OF, 1);
     }
@@ -1560,9 +1517,14 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
         LSRw_REG(s3, s4, s3);
         BFIw(xFlags, s3, F_CF, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         LSRw(s4, s1, 15);
     }
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
+        LSRw(s4, s1, 14);
+        EORw_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     BFIw(s1, s2, 16, 16);   // create concat first
     MOV32w(s3, 32);
     SUBw_REG(s3, s3, s5);
@@ -1571,9 +1533,9 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && BOX64ENV(cputype)) {
         EORw_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
-            BFIw(xFlags, s3, F_OF, 1);
+        BFIw(xFlags, s3, F_OF, 1);
     }
     COMP_ZFSF(s1, 16)
     IFX(X_AF) {BFCw(xFlags, F_AF, 1);}