about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorptitSeb <sebastien.chev@gmail.com>2024-11-14 16:43:01 +0100
committerptitSeb <sebastien.chev@gmail.com>2024-11-14 16:43:01 +0100
commitd05e719ce3caa5fd6e7de656f4d28a91bb718dca (patch)
treea12d674361fbc094e42b40641fd4ca909f63b9be
parent12f4afcba28931ea6f367e11abc2f27ba68ee7f9 (diff)
downloadbox64-d05e719ce3caa5fd6e7de656f4d28a91bb718dca.tar.gz
box64-d05e719ce3caa5fd6e7de656f4d28a91bb718dca.zip
[ARM64_DYNAREC] Refactor 8/16/32/64bits CMP and REP CMPS/SCAS opcodes
-rw-r--r--src/dynarec/arm64/dynarec_arm64_00.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c5
-rw-r--r--src/dynarec/arm64/dynarec_arm64_66.c12
-rw-r--r--src/dynarec/arm64/dynarec_arm64_emit_tests.c137
4 files changed, 115 insertions, 51 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_00.c b/src/dynarec/arm64/dynarec_arm64_00.c
index a7df0d7b..f1bc34f7 100644
--- a/src/dynarec/arm64/dynarec_arm64_00.c
+++ b/src/dynarec/arm64/dynarec_arm64_00.c
@@ -1599,7 +1599,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             case 1:
             case 2:
                 if(rep==1) {INST_NAME("REPNZ CMPSB");} else {INST_NAME("REPZ CMPSB");}
-                MAYSETFLAGS();
+                if(box64_dynarec_safeflags>1)
+                    MAYSETFLAGS();
                 SMREAD();
                 SETFLAGS(X_ALL, SF_SET_PENDING);
                 CBZx_NEXT(xRCX);
@@ -1640,7 +1641,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             case 1:
             case 2:
                 if(rep==1) {INST_NAME("REPNZ CMPSD");} else {INST_NAME("REPZ CMPSD");}
-                MAYSETFLAGS();
+                if(box64_dynarec_safeflags>1)
+                    MAYSETFLAGS();
                 SETFLAGS(X_ALL, SF_SET_PENDING);
                 SMREAD();
                 CBZx_NEXT(xRCX);
@@ -1784,7 +1786,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             case 1:
             case 2:
                 if(rep==1) {INST_NAME("REPNZ SCASB");} else {INST_NAME("REPZ SCASB");}
-                MAYSETFLAGS();
+                if(box64_dynarec_safeflags>1)
+                    MAYSETFLAGS();
                 SMREAD();
                 SETFLAGS(X_ALL, SF_SET_PENDING);
                 CBZx_NEXT(xRCX);
@@ -1822,7 +1825,8 @@ uintptr_t dynarec64_00(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             case 1:
             case 2:
                 if(rep==1) {INST_NAME("REPNZ SCASD");} else {INST_NAME("REPZ SCASD");}
-                MAYSETFLAGS();
+                if(box64_dynarec_safeflags>1)
+                    MAYSETFLAGS();
                 SMREAD();
                 SETFLAGS(X_ALL, SF_SET_PENDING);
                 CBZx_NEXT(xRCX);
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 9034398c..90376126 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -1921,9 +1921,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 UFLAG_IF {emit_cmp32(dyn, ninst, rex, xRAX, ed, x3, x4, x5);}

                 MOVxw_REG(x1, ed);  // save value

                 SUBxw_REG(x4, xRAX, x1);

-                CBNZxw_MARK2(x4);

+                CBNZxw_MARK(x4);

                 MOVxw_REG(ed, gd);

-                MARK2;

             } else {

                 addr = geted(dyn, addr, ninst, nextop, &wback, x2, &fixedaddress, &unscaled, 0xfff<<(2+rex.w), (1<<(2+rex.w))-1, rex, NULL, 0, 0);

                 LDxw(x1, wback, fixedaddress);

@@ -1932,8 +1931,8 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                 CBNZxw_MARK(x4);

                 // EAX == Ed

                 STxw(gd, wback, fixedaddress);

-                MARK;

             }

+            MARK;

             MOVxw_REG(xRAX, x1);    // upper part of RAX will be erase on 32bits, no mater what

             break;

 

diff --git a/src/dynarec/arm64/dynarec_arm64_66.c b/src/dynarec/arm64/dynarec_arm64_66.c
index 86f839fb..0c6ef67b 100644
--- a/src/dynarec/arm64/dynarec_arm64_66.c
+++ b/src/dynarec/arm64/dynarec_arm64_66.c
@@ -304,10 +304,10 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
         case 0x3D:

             INST_NAME("CMP AX, Iw");

             SETFLAGS(X_ALL, SF_SET_PENDING);

-            i16 = F16;

+            u16 = F16;

             UXTHw(x1, xRAX);

-            if(i16) {

-                MOV32w(x2, i16);

+            if(u16) {

+                MOV32w(x2, u16);

                 emit_cmp16(dyn, ninst, x1, x2, x3, x4, x5);

             } else {

                 emit_cmp16_0(dyn, ninst, x1, x3, x4);

@@ -820,7 +820,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             case 1:

             case 2:

                 if(rep==1) {INST_NAME("REPNZ CMPSW");} else {INST_NAME("REPZ CMPSW");}

-                MAYSETFLAGS();

+                if(box64_dynarec_safeflags>1)

+                    MAYSETFLAGS();

                 SETFLAGS(X_ALL, SF_SET_PENDING);

                 CBZx_NEXT(xRCX);

                 TBNZ_MARK2(xFlags, F_DF);

@@ -921,7 +922,8 @@ uintptr_t dynarec64_66(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
             case 1:

             case 2:

                 if(rep==1) {INST_NAME("REPNZ SCASW");} else {INST_NAME("REPZ SCASW");}

-                MAYSETFLAGS();

+                if(box64_dynarec_safeflags>1)

+                    MAYSETFLAGS();

                 SETFLAGS(X_ALL, SF_SET_PENDING);

                 CBZx_NEXT(xRCX);

                 UXTHw(x1, xRAX);

diff --git a/src/dynarec/arm64/dynarec_arm64_emit_tests.c b/src/dynarec/arm64/dynarec_arm64_emit_tests.c
index 1b5184f4..77937ab0 100644
--- a/src/dynarec/arm64/dynarec_arm64_emit_tests.c
+++ b/src/dynarec/arm64/dynarec_arm64_emit_tests.c
@@ -136,32 +136,61 @@ void emit_cmp16(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, i
     } else {
         SET_DFNONE(s3);
     }
-    SUBw_REG(s5, s1, s2);   // res = s1 - s2
-    IFX_PENDOR0 {
-        STRH_U12(s5, xEmu, offsetof(x64emu_t, res));
+    IFX(X_AF) {
+        ORNw_REG(s3, s2, s1);  // s3 = ~op1 | op2
+        BICw(s4, s2, s1);      // s4 = ~op1 & op2
     }
-    COMP_ZFSF(s5, 16)
-    // bc = (res & (~d | s)) | (~d & s)
-    IFX(X_CF|X_AF|X_OF) {
-        MVNw_REG(s4, s1);        // s4 = ~d
-        ORRw_REG(s4, s4, s2);    // s4 = ~d | s
-        ANDw_REG(s4, s4, s5);    // s4 = res & (~d | s)
-        BICw_REG(s3, s2, s1);    // s3 = s & ~d
-        ORRw_REG(s3, s4, s3);    // s3 = (res & (~d | s)) | (s & ~d)
-        IFX(X_CF) {
-            LSRw(s4, s3, 15);
-            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x8000
-        }
+    IFX(X_CF|X_ZF|X_SF|X_OF) {
+        LSLw(s5, s1, 16);
+        SUBSw_REG_LSL(s5, s5, s2, 16);
         IFX(X_AF) {
+            ANDw_REG_LSR(s3, s3, s5, 16);   // s3 = (~op1 | op2) & res
+            ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
             LSRw(s4, s3, 3);
-            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+            BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_ZF) {
+            IFNATIVE(NF_EQ) {} else {
+                CSETw(s4, cEQ);
+                BFIw(xFlags, s4, F_ZF, 1);
+            }
+        }
+        IFX(X_CF) {
+            // inverted carry
+            IFNATIVE(NF_CF) {
+                GEN_INVERTED_CARRY();
+            } else {
+                CSETw(s4, cCC);
+                BFIw(xFlags, s4, F_CF, 1);
+            }
         }
         IFX(X_OF) {
-            LSRw(s4, s3, 14);
-            EORw_REG_LSR(s4, s4, s4, 1);
-            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 14) ^ ((bc>>14)>>1)) & 1
+            IFNATIVE(NF_VF) {} else {
+                CSETw(s4, cVS);
+                BFIw(xFlags, s4, F_OF, 1);
+            }
+        }
+        IFX(X_SF) {
+            IFNATIVE(NF_SF) {} else {
+                CSETw(s4, cMI);
+                BFIw(xFlags, s4, F_SF, 1);
+            }
+        }
+        IFX(X_PF|X_PEND) {
+            LSRw(s5, s5, 16);
+        }
+    } else {
+        SUBw_REG(s5, s1, s2);   // res = s1 - s2
+        IFX(X_AF) {
+            ANDw_REG(s3, s3, s5);   // s3 = (~op1 | op2) & res
+            ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+            LSRw(s4, s3, 3);
+            BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
         }
     }
+    IFX_PENDOR0 {
+        STRH_U12(s5, xEmu, offsetof(x64emu_t, res));
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s5, s4);
     }
@@ -196,34 +225,64 @@ void emit_cmp8(dynarec_arm_t* dyn, int ninst, int s1, int s2, int s3, int s4, in
     IFX_PENDOR0 {
         STRB_U12(s1, xEmu, offsetof(x64emu_t, op1));
         STRB_U12(s2, xEmu, offsetof(x64emu_t, op2));
-        SET_DF(s4, d_cmp8);
+        SET_DF(s3, d_cmp8);
     } else {
-        SET_DFNONE(s4);
+        SET_DFNONE(s3);
     }
-    SUBw_REG(s5, s1, s2);   // res = s1 - s2
-    IFX_PENDOR0 {
-        STRB_U12(s5, xEmu, offsetof(x64emu_t, res));
+    IFX(X_AF) {
+        ORNw_REG(s3, s2, s1);  // s3 = ~op1 | op2
+        BICw(s4, s2, s1);      // s4 = ~op1 & op2
     }
-    COMP_ZFSF(s5, 8)
-    // bc = (res & (~d | s)) | (~d & s)
-    IFX(X_CF|X_AF|X_OF) {
-        ORNw_REG(s4, s2, s1);   // s4 = ~d | s
-        ANDw_REG(s4, s4, s5);   // s4 = res & (~d | s)
-        BICw_REG(s3, s2, s1);   // s3 = s & ~d
-        ORRw_REG(s3, s4, s3);   // s3 = (res & (~d | s)) | (s & ~d)
-        IFX(X_CF) {
-            LSRw(s4, s3, 7);
-            BFIw(xFlags, s4, F_CF, 1);    // CF : bc & 0x80
-        }
+    IFX(X_CF|X_ZF|X_SF|X_OF) {
+        LSLw(s5, s1, 24);
+        SUBSw_REG_LSL(s5, s5, s2, 24);
         IFX(X_AF) {
+            ANDw_REG_LSR(s3, s3, s5, 24);   // s3 = (~op1 | op2) & res
+            ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
             LSRw(s4, s3, 3);
-            BFIw(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+            BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+        IFX(X_ZF) {
+            IFNATIVE(NF_EQ) {} else {
+                CSETw(s4, cEQ);
+                BFIw(xFlags, s4, F_ZF, 1);
+            }
+        }
+        IFX(X_CF) {
+            // inverted carry
+            IFNATIVE(NF_CF) {
+                GEN_INVERTED_CARRY();
+            } else {
+                CSETw(s4, cCC);
+                BFIw(xFlags, s4, F_CF, 1);
+            }
         }
         IFX(X_OF) {
-            LSRw(s4, s3, 6);
-            EORw_REG_LSR(s4, s4, s4, 1);
-            BFIw(xFlags, s4, F_OF, 1);    // OF: ((bc >> 6) ^ ((bc>>6)>>1)) & 1
+            IFNATIVE(NF_VF) {} else {
+                CSETw(s4, cVS);
+                BFIw(xFlags, s4, F_OF, 1);
+            }
         }
+        IFX(X_SF) {
+            IFNATIVE(NF_SF) {} else {
+                CSETw(s4, cMI);
+                BFIw(xFlags, s4, F_SF, 1);
+            }
+        }
+        IFX(X_PF|X_PEND) {
+            LSRw(s5, s5, 24);
+        }
+    } else {
+        SUBw_REG(s5, s1, s2);   // res = s1 - s2
+        IFX(X_AF) {
+            ANDw_REG(s3, s3, s5);   // s3 = (~op1 | op2) & res
+            ORRw_REG(s3, s3, s4);   // s3 = (~op1 & op2) | ((~op1 | op2) & res)
+            LSRw(s4, s3, 3);
+            BFIx(xFlags, s4, F_AF, 1);    // AF: bc & 0x08
+        }
+    }
+    IFX_PENDOR0 {
+        STRB_U12(s5, xEmu, offsetof(x64emu_t, res));
     }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s5, s4);