about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2025-02-20 13:47:04 +0100
committerptitSeb <sebastien.chev@gmail.com>2025-02-20 13:47:04 +0100
commitc2cb333261e31caedc2cf3ca2a13405c660c30b1 (patch)
tree6b47bb5c70659842f05a941b5a376f08989576a5 /src
parentb7e4cc4f50a8aac75ba9ea2faddcb29e1394612c (diff)
downloadbox64-c2cb333261e31caedc2cf3ca2a13405c660c30b1.tar.gz
box64-c2cb333261e31caedc2cf3ca2a13405c660c30b1.zip
[ARM64_DYNAREC] More work on UD flags
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c25
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_shift.c166
2 files changed, 122 insertions, 69 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index 7785b84e..dcf2037a 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -2885,14 +2885,15 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
+                    CBZw_NEXT(x2);
                     // get CL % 9
                     MOV32w(x3, 0x1c72); // 0x10000 / 9 + 1 (this is precise enough in the 0..31 range)
                     MULw(x3, x3, x2);
                     LSRw(x3, x3, 16);   // x3 = CL / 9
                     MOV32w(x4, 9);
                     MSUBw(x2, x3, x4, x2);  // CL mod 9
-                    CBZw_NEXT(x2);
                     GETEB(x1, 0);
+                    CBZw_MARK(x2);
                     IFX2(X_OF, && !BOX64ENV(cputype)) {
                         LSRw(x5, ed, 6);
                         EORw_REG_LSR(x5, x5, x5, 1);
@@ -2907,13 +2908,16 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     LSRw_REG(ed, ed, x2);
                     EBBACK;
+                    u8 = X_CF;
+                    if(BOX64ENV(cputype)) u8 |= X_OF;
+                    IFX(u8) {
+                        BFXILw(xFlags, x5, 0, 1);
+                    }
+                    MARK;
                     IFX2(X_OF, && BOX64ENV(cputype)) {
-                        EORw_REG_LSR(x2, x5, ed, 7);
+                        EORw_REG_LSR(x2, xFlags, ed, 7);
                         BFIw(xFlags, x2, F_OF, 1);
                     }
-                    IFX(X_CF) {
-                        BFXILw(xFlags, x5, 0, 1);
-                    }
                     break;
                 case 3:
                     INST_NAME("RCR Eb, CL");
@@ -2924,17 +2928,18 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     SETFLAGS(X_OF|X_CF, SF_SUBSET);
                     ANDw_mask(x2, xRCX, 0, 0b00100);  //mask=0x00000001f
+                    CBZw_NEXT(x2);
                     // get CL % 9
                     MOV32w(x3, 0x1c72); // 0x10000 / 9 + 1
                     MULw(x3, x3, x2);
                     LSRw(x3, x3, 16);   // x3 = CL / 9
                     MOV32w(x4, 9);
                     MSUBw(x2, x3, x4, x2);  // CL mod 9
-                    CBZw_NEXT(x2);
                     GETEB(x1, 0);
+                    CBZw_MARK(x2);
                     BFIw(ed, xFlags, 8, 1); // insert CF
                     ORRw_REG_LSL(ed, ed, ed, 9);    // insert rest of ed
-                    IFX(X_OF) {
+                    IFX2(X_OF, && !BOX64ENV(cputype)) {
                         EORw_REG_LSR(x5, xFlags, ed, 7);
                         BFIw(xFlags, x5, F_OF, 1);
                     }
@@ -2945,6 +2950,12 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     }
                     LSRw_REG(ed, ed, x2);
                     EBBACK;
+                    MARK;
+                    IFX2(X_OF, && BOX64ENV(cputype)) {
+                        LSRw(x4, ed, 6);
+                        EORw_REG_LSR(x4, x4, x4, 1);
+                        BFIw(xFlags, x4, F_OF, 1);
+                    }
                     break;
                 case 4:
                 case 6:
diff --git a/src/dynarec/arm64/dynarec_arm64_emit_shift.c b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
index f0bafd1e..adc22c68 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_shift.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_shift.c
@@ -455,7 +455,8 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
     }
     if(BOX64ENV(cputype)) {
         IFX(X_CF|X_OF) {
-            BFXILw(xFlags, s1, 8-(c&7), 1);
+            LSRw(s3, s1, (c>8)?8:(8-c));
+            BFIw(xFlags, s3, F_CF, 1);
         }
     } else {
         IFX(X_OF) {
@@ -473,13 +474,12 @@ void emit_shl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    COMP_ZFSF(s1, 8)
     if(BOX64ENV(cputype))
         IFX(X_OF) {
-            IFX2(X_SF, && !arm64_flagm) {} else {LSRw(s3, s1, 7);}  //use COMP_ZFSF operation
-            EORw_REG(s4, s3, xFlags);  // CF is set if OF is asked
+            EORw_REG_LSR(s4, xFlags, s1, 7);  // CF is set if OF is asked
             BFIw(xFlags, s4, F_OF, 1);
         }
+    COMP_ZFSF(s1, 8)
     IFX (X_AF) {
         if(BOX64ENV(cputype))
             ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
@@ -510,7 +510,7 @@ void emit_shr8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
         LSRw_REG(s4, s1, s4);
         BFIw(xFlags, s4, 0, 1);
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
         LSRw(s4, s1, 7);
         BFIw(xFlags, s4, F_OF, 1);
     }
@@ -518,6 +518,10 @@ void emit_shr8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    IFX2(X_OF, && BOX64ENV(cputype)) {
+        LSRw(s4, s1, 6);
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     COMP_ZFSF(s1, 8)
     IFX(X_AF) {
         if(BOX64ENV(cputype))
@@ -550,7 +554,7 @@ void emit_shr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
             BFIw(xFlags, s3, 0, 1);
         }
     }
-    IFX(X_OF) {
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
         LSRw(s4, s1, 7);
         BFIw(xFlags, s4, F_OF, 1);
     }
@@ -558,6 +562,10 @@ void emit_shr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
     IFX(X_PEND) {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
+    IFX2(X_OF, && BOX64ENV(cputype)) {
+        LSRw(s4, s1, 6);
+        BFIw(xFlags, s4, F_OF, 1);
+    }
     COMP_ZFSF(s1, 8)
     IFX(X_AF) {
         if(BOX64ENV(cputype))
@@ -672,7 +680,7 @@ void emit_shl16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4)
             BFIw(xFlags, s4, F_CF, 1);
         }
     } else {
-        IFX(F_OF) {
+        IFX(X_OF) {
             LSRw(s4, s1, 14);
             EORw_REG_LSR(s4, s4, s4, 1);
             BFIw(xFlags, s4, F_OF, 1);
@@ -724,7 +732,7 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
             BFIw(xFlags, s3, F_CF, 1);
         }
     } else {
-        IFX(F_OF) {
+        IFX(X_OF) {
             LSRw(s4, s1, 14);
             EORw_REG_LSR(s4, s4, s4, 1);
             BFIw(xFlags, s4, F_OF, 1);
@@ -739,12 +747,12 @@ void emit_shl16c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    COMP_ZFSF(s1, 16)
     if(BOX64ENV(cputype))
         IFX(X_OF) {
             EORw_REG_LSR(s4, xFlags, s1, 15);  // CF is set if OF is asked
             BFIw(xFlags, s4, F_OF, 1);
         }
+    COMP_ZFSF(s1, 16)
     IFX (X_AF) {
         if(BOX64ENV(cputype))
             ORRw_mask(xFlags, xFlags, 28, 0); // mask = 0x10
@@ -1076,21 +1084,34 @@ void emit_rcl8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 {
     MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
 
-    if (!(c%9)) return;
-
     c%=9;
-    BFIw(s1, xFlags, 8, 1); // insert cf
-    IFX(X_OF|X_CF) {
-        BFXILw(xFlags, s1, 8-c, 1);
+    if (!c && !BOX64ENV(cputype)) return;
+    if(c) {
+        BFIw(s1, xFlags, 8, 1); // insert cf
+        if(BOX64ENV(cputype)) {
+            IFX(X_OF|X_CF) {
+                BFXILw(xFlags, s1, 8-c, 1);
+            }
+        } else {
+            IFX(X_CF) {
+                BFXILw(xFlags, s1, 8-c, 1);
+            }
+        }
     }
-    if(!BOX64ENV(cputype))
+    if(!BOX64ENV(cputype)) {
+        IFX(X_OF|X_CF) {
+            BFXILw(xFlags, s1, 8-c, 1);
+        }
         IFX(X_OF) {
             LSRw(s3, s1, 6);
             EORw_REG_LSR(s3, s3, s3, 1);
             BFIw(xFlags, s3, F_OF, 1);
         }
-    ORRw_REG_LSL(s1, s1, s1, 9);    // insert s1 again
-    LSRw_IMM(s1, s1, 9-c); // do the rcl
+    }
+    if(c) {
+        ORRw_REG_LSL(s1, s1, s1, 9);    // insert s1 again
+        LSRw_IMM(s1, s1, 9-c); // do the rcl
+    }
     if(BOX64ENV(cputype))
         IFX(X_OF) {
             EORw_REG_LSR(s3, xFlags, s1, 7);
@@ -1103,21 +1124,27 @@ void emit_rcr8c(dynarec_arm_t* dyn, int ninst, int s1, uint32_t c, int s3, int s
 {
     MAYUSE(s1); MAYUSE(s3); MAYUSE(s4);
 
-    if (!(c%9)) return;
-
     c%=9;
-    IFX(X_OF) {
+    if (!c && !BOX64ENV(cputype)) return;
+    IFX2(X_OF, && !BOX64ENV(cputype)) {
         EORw_REG_LSR(s3, xFlags, s1, 7);
         BFIw(xFlags, s3, F_OF, 1);
     }
-    BFIw(s1, xFlags, 8, 1); // insert cf
-    IFX(X_CF) {
-        BFXILw(xFlags, s1, c-1, 1);
+    if(c) {
+        BFIw(s1, xFlags, 8, 1); // insert cf
+        IFX(X_CF) {
+            BFXILw(xFlags, s1, c-1, 1);
+        }
+        if(c>1) {
+            ORRw_REG_LSL(s1, s1, s1, 9);    // insert s1 again
+        }
+        LSRw_IMM(s1, s1, c); // do the rcr
     }
-    if(c>1) {
-        ORRw_REG_LSL(s1, s1, s1, 9);    // insert s1 again
+    IFX2(X_OF, && BOX64ENV(cputype)) {
+        LSRw(s4, s1, 6);
+        EORw_REG_LSR(s4, s4, s4, 1);
+        BFIw(xFlags, s4, F_OF, 1);
     }
-    LSRw_IMM(s1, s1, c); // do the rcr
 }
 
 // emit RCL16 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
@@ -1515,12 +1542,12 @@ void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
     } else {
         SET_DFNONE();
     }
-    ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
-    IFX(X_CF) {
+    IFX2(X_CF, && BOX64ENV(cputype)) {
         BFXILw(xFlags, s1, c-1, 1);    // set CF
     }
-    IFX2(X_OF, && BOX64ENV(cputype)) {
-        LSRw(s4, s1, 15);
+    ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
+    IFX2(X_CF, && !BOX64ENV(cputype)) {
+        BFXILw(xFlags, s1, c-1, 1);    // set CF
     }
     IFX2(X_OF, && !BOX64ENV(cputype)) {
         EORw_REG_LSR(s4, s2, s1, 15);
@@ -1531,8 +1558,9 @@ void emit_shrd16c(dynarec_arm_t* dyn, int ninst, int s1, int s2, uint32_t c, int
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX2(X_OF, && BOX64ENV(cputype)) {
-        EORx_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
-        BFIw(xFlags, s3, F_OF, 1);
+        LSRw(s4, s1, 14);
+        EORx_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
+        BFIw(xFlags, s4, F_OF, 1);
     }
     COMP_ZFSF(s1, 16)
     IFX(X_AF) {
@@ -1557,26 +1585,31 @@ void emit_shrd16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
     } else {
         SET_DFNONE();
     }
-    ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
-    IFX(X_CF) {
+    IFX2(X_CF, && BOX64ENV(cputype)) {
         SUBw_U12(s3, s5, 1);
-        LSRw_REG(s3, s1, s3);
-        BFIw(xFlags, s3, F_CF, 1);
-    }
-    IFX2(X_OF, && BOX64ENV(cputype)) {
-        LSRw(s4, s1, 15);
+        LSRw_REG(s4, s1, s3);
+        BFIw(xFlags, s4, F_CF, 1);
     }
-    IFX2(X_OF, && !BOX64ENV(cputype)) {
-        EORw_REG_LSR(s4, s2, s1, 15);
-        BFIw(xFlags, s4, F_OF, 1);
+    ORRw_REG_LSL(s1, s1, s2, 16);   // create concat first
+    if(!BOX64ENV(cputype)) {
+        IFX(X_CF) {
+            SUBw_U12(s3, s5, 1);
+            LSRw_REG(s3, s1, s3);
+            BFIw(xFlags, s3, F_CF, 1);
+        }
+        IFX(X_OF) {
+            EORw_REG_LSR(s4, s2, s1, 15);
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     RORw_REG(s1, s1, s5);
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
     IFX2(X_OF, && BOX64ENV(cputype)) {
-        EORw_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
-        BFIw(xFlags, s3, F_OF, 1);
+        LSRw(s4, s1, 14);
+        EORx_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
+        BFIw(xFlags, s4, F_OF, 1);
     }
     COMP_ZFSF(s1, 16)
     IFX(X_AF) {
@@ -1655,20 +1688,33 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
     } else {
         SET_DFNONE();
     }
-    IFX(X_CF) {
-        ORRw_REG_LSL(s4, s2, s1, 16);
-        MOV32w(s3, 32);
-        SUBw_REG(s3, s3, s5);
-        LSRw_REG(s3, s4, s3);
-        BFIw(xFlags, s3, F_CF, 1);
-    }
-    IFX2(X_OF, && BOX64ENV(cputype)) {
-        LSRw(s4, s1, 15);
-    }
-    IFX2(X_OF, && !BOX64ENV(cputype)) {
-        LSRw(s4, s1, 14);
-        EORw_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
-        BFIw(xFlags, s4, F_OF, 1);
+    uint8_t mask = X_CF;
+    if(BOX64ENV(cputype)) {
+        IFX(X_CF|X_OF) {
+            MOV32w(s3, 16);
+            SUBw_REG(s3, s3, s5);
+            LSRw_REG(s4, s1, s3);
+            BFIw(xFlags, s4, F_CF, 1);
+        }
+        IFX(X_OF) {
+            SUBw_U12(s3, s3, 1);
+            LSRw_REG(s3, s1, s3);
+            EORw_REG(s3, xFlags, s3);   // OF is set if sign changed
+            BFIw(xFlags, s3, F_OF, 1);
+        }
+    } else {
+        IFX(X_CF) {
+            ORRw_REG_LSL(s4, s2, s1, 16);
+            MOV32w(s3, 32);
+            SUBw_REG(s3, s3, s5);
+            LSRw_REG(s3, s4, s3);
+            BFIw(xFlags, s3, F_CF, 1);
+        }
+        IFX(X_OF) {
+            LSRw(s4, s1, 14);
+            EORw_REG_LSR(s4, s4, s4, 1);   // OF is set if sign changed
+            BFIw(xFlags, s4, F_OF, 1);
+        }
     }
     BFIw(s1, s2, 16, 16);   // create concat first
     MOV32w(s3, 32);
@@ -1678,10 +1724,6 @@ void emit_shld16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s5, int s3,
     IFX(X_PEND) {
         STRH_U12(s1, xEmu, offsetof(x64emu_t, res));
     }
-    IFX2(X_OF, && BOX64ENV(cputype)) {
-        EORw_REG_LSR(s3, s4, s1, 15);   // OF is set if sign changed
-        BFIw(xFlags, s3, F_OF, 1);
-    }
     COMP_ZFSF(s1, 16)
     IFX(X_AF) {
         if(BOX64ENV(cputype))