[RV64_DYNAREC] Added 39 CMP opcode and some fixes (#565)

author: Yang Liu <liuyang22@iscas.ac.cn> 2023-03-15 15:00:26 +0800
committer: GitHub <noreply@github.com> 2023-03-15 08:00:26 +0100
commit: b5f3d2565abac6e86ed520aa87a1e3e6ce98218d (patch)
tree: 110be1d39bb6e26f681c702891abca3f4159b19a /src
parent: 2fdf52afb83b2b09f152758e9ff84131bdaf6322 (diff)
download: box64-b5f3d2565abac6e86ed520aa87a1e3e6ce98218d.tar.gz
box64-b5f3d2565abac6e86ed520aa87a1e3e6ce98218d.zip
4 files changed, 119 insertions, 55 deletions
diff --git a/src/dynarec/rv64/dynarec_rv64_00.c b/src/dynarec/rv64/dynarec_rv64_00.c
index 48e2ca27..385e83b2 100644
--- a/src/dynarec/rv64/dynarec_rv64_00.c
+++ b/src/dynarec/rv64/dynarec_rv64_00.c
@@ -71,7 +71,14 @@ uintptr_t dynarec64_00(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_xor32(dyn, ninst, rex, ed, gd, x3, x4);
             WBACK;
             break;
-
+        case 0x39:
+            INST_NAME("CMP Ed, Gd");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_cmp32(dyn, ninst, rex, ed, gd, x3, x4, x5, x6);
+            break;
         case 0x50:
         case 0x51:
         case 0x52:
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_math.c b/src/dynarec/rv64/dynarec_rv64_emit_math.c
index 424b9a9c..417d0ccf 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_math.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_math.c
@@ -23,53 +23,6 @@
 #include "dynarec_rv64_functions.h"
 #include "dynarec_rv64_helper.h"
 
-#define CALC_SUB_FLAGS()                                                  \
-    IFX(X_PEND) {                                                         \
-        SDxw(s1, xEmu, offsetof(x64emu_t, res));                          \
-    }                                                                     \
-                                                                          \
-    IFX(X_AF | X_CF | X_OF) {                                             \
-        /* calc borrow chain */                                           \
-        /* bc = (res & (~op1 | op2)) | (~op1 & op2) */                    \
-        OR(s3, s5, s2);                                                   \
-        AND(s4, s1, s3);                                                  \
-        AND(s5, s5, s2);                                                  \
-        OR(s4, s4, s5);                                                   \
-        IFX(X_AF) {                                                       \
-            /* af = bc & 0x8 */                                           \
-            ANDI(s3, s4, 8);                                              \
-            BEQZ(s3, 4);                                                  \
-            ORI(xFlags, xFlags, 1 << F_AF);                               \
-        }                                                                 \
-        IFX(X_CF) {                                                       \
-            /* cf = bc & (rex.w?(1<<63):(1<<31)) */                       \
-            SRLI(s3, s4, rex.w?63:31);                                    \
-            BEQZ(s3, 4);                                                  \
-            ORI(xFlags, xFlags, 1 << F_CF);                               \
-        }                                                                 \
-        IFX(X_OF) {                                                       \
-            /* of = ((bc >> rex.w?62:30) ^ (bc >> rex.w?63:31)) & 0x1; */ \
-            SRLI(s3, s4, rex.w?62:30);                                    \
-            SRLI(s4, s3, 1);                                              \
-            XOR(s3, s3, s4);                                              \
-            ANDI(s3, s3, 1);                                              \
-            BEQZ(s3, 4);                                                  \
-            ORI(xFlags, xFlags, 1 << F_OF);                               \
-        }                                                                 \
-    }                                                                     \
-    IFX(X_ZF) {                                                           \
-        BEQZ(s1, 4);                                                      \
-        ORI(xFlags, xFlags, 1 << F_ZF);                                   \
-    }                                                                     \
-    IFX(X_SF) {                                                           \
-        SRLI(s3, s1, rex.w?63:31);                                        \
-        BEQZ(s3, 4);                                                      \
-        ORI(xFlags, xFlags, 1 << F_SF);                                   \
-    }                                                                     \
-    IFX(X_PF) {                                                           \
-        emit_pf(dyn, ninst, s1, s3, s4);                                  \
-    }                                                                     \
-
 // emit ADD32 instruction, from s1, constant c, store result in s1 using s3 and s4 as scratch
 void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5)
 {
@@ -152,13 +105,13 @@ void emit_add32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         BGE(s1, xZR, 4);
         ORI(xFlags, xFlags, 1 << F_SF);
     }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
     IFX(X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
     IFX(X_ZF) {
-        if (!rex.w) {
-            ZEROUP(s1);
-        }
         BNEZ(s1, 4);
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
@@ -183,7 +136,24 @@ void emit_sub32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
     }
 
     SUBxw(s1, s1, s2);
-    CALC_SUB_FLAGS();
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_SF) {
+        BGE(s1, xZR, 4);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    CALC_SUB_FLAGS(s5, s2, s1, s3, s4);
+    IFX(X_ZF) {
+        BEQZ(s1, 4);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
 }
 
 
@@ -218,7 +188,7 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
     }
 
     if (c > -2048 && c <= 2048) {
-        ADDI(s1, s1, -c);
+        ADDIxw(s1, s1, -c);
     } else {
         IFX(X_PEND) {} else {MOV64x(s2, c);}
         SUBxw(s1, s1, s2);
@@ -230,5 +200,22 @@ void emit_sub32c(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
             MOV64x(s2, c);
         }
     }
-    CALC_SUB_FLAGS();
+    IFX(X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_SF) {
+        BGE(s1, xZR, 4);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    CALC_SUB_FLAGS(s5, s2, s1, s3, s4);
+    IFX(X_ZF) {
+        BEQZ(s1, 4);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
 }
diff --git a/src/dynarec/rv64/dynarec_rv64_emit_tests.c b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
index d12bb7f9..f4394800 100644
--- a/src/dynarec/rv64/dynarec_rv64_emit_tests.c
+++ b/src/dynarec/rv64/dynarec_rv64_emit_tests.c
@@ -23,6 +23,45 @@
 #include "dynarec_rv64_functions.h"
 #include "dynarec_rv64_helper.h"
 
+// emit CMP32 instruction, from cmp s1, s2, using s3 and s4 as scratch
+void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6)
+{
+    CLEAR_FLAGS();
+    IFX_PENDOR0 {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SDxw(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, rex.w?d_cmp64:d_cmp32);
+    } else {
+        SET_DFNONE(s4);
+    }
+
+    IFX(X_AF | X_CF | X_OF) {
+        // for later flag calculation
+        NOT(s5, s1);
+    }
+
+    // It's a cmp, we can't store the result back to s1.
+    SUBxw(s6, s1, s2);
+    IFX_PENDOR0 {
+        SDxw(s6, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX(X_SF) {
+        BGE(s6, xZR, 0);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    CALC_SUB_FLAGS(s5, s2, s6, s3, s4);
+    IFX(X_ZF) {
+        BEQZ(s6, 4);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX(X_PF) {
+        emit_pf(dyn, ninst, s6, s3, s4);
+    }
+}
+
 // emit TEST32 instruction, from test s1, s2, using s3 and s4 as scratch
 void emit_test32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
diff --git a/src/dynarec/rv64/dynarec_rv64_helper.h b/src/dynarec/rv64/dynarec_rv64_helper.h
index befeba12..b3017667 100644
--- a/src/dynarec/rv64/dynarec_rv64_helper.h
+++ b/src/dynarec/rv64/dynarec_rv64_helper.h
@@ -89,6 +89,37 @@
 
 #define CLEAR_FLAGS() IFX(X_ALL) {ANDI(xFlags, xFlags, ~((1UL<<F_AF) | (1UL<<F_CF) | (1UL<<F_OF) | (1UL<<F_ZF) | (1UL<<F_SF) | (1UL<<F_PF)));}
 
+#define CALC_SUB_FLAGS(op1_, op2, res, scratch1, scratch2)                \
+    IFX(X_AF | X_CF | X_OF) {                                             \
+        /* calc borrow chain */                                           \
+        /* bc = (res & (~op1 | op2)) | (~op1 & op2) */                    \
+        OR(scratch1, op1_, op2);                                          \
+        AND(scratch2, res, scratch1);                                     \
+        AND(op1_, op1_, op2);                                             \
+        OR(scratch2, scratch2, op1_);                                     \
+        IFX(X_AF) {                                                       \
+            /* af = bc & 0x8 */                                           \
+            ANDI(scratch1, scratch2, 8);                                  \
+            BEQZ(scratch1, 4);                                            \
+            ORI(xFlags, xFlags, 1 << F_AF);                               \
+        }                                                                 \
+        IFX(X_CF) {                                                       \
+            /* cf = bc & (rex.w?(1<<63):(1<<31)) */                       \
+            SRLI(scratch1, scratch2, rex.w?63:31);                        \
+            BEQZ(scratch1, 4);                                            \
+            ORI(xFlags, xFlags, 1 << F_CF);                               \
+        }                                                                 \
+        IFX(X_OF) {                                                       \
+            /* of = ((bc >> rex.w?62:30) ^ (bc >> rex.w?63:31)) & 0x1; */ \
+            SRLI(scratch1, scratch2, rex.w?62:30);                        \
+            SRLI(scratch2, scratch1, 1);                                  \
+            XOR(scratch1, scratch1, scratch2);                            \
+            ANDI(scratch1, scratch1, 1);                                  \
+            BEQZ(scratch1, 4);                                            \
+            ORI(xFlags, xFlags, 1 << F_OF);                               \
+        }                                                                 \
+    }
+
 #ifndef MAYSETFLAGS
 #define MAYSETFLAGS()
 #endif
@@ -330,7 +361,7 @@ void jump_to_next(dynarec_rv64_t* dyn, uintptr_t ip, int reg, int ninst);
 //void grab_segdata(dynarec_rv64_t* dyn, uintptr_t addr, int ninst, int reg, int segment);
 //void emit_cmp8(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 //void emit_cmp16(dynarec_rv64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
-//void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
+void emit_cmp32(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6);
 //void emit_cmp8_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_cmp16_0(dynarec_rv64_t* dyn, int ninst, int s1, int s3, int s4);
 //void emit_cmp32_0(dynarec_rv64_t* dyn, int ninst, rex_t rex, int s1, int s3, int s4);
author	Yang Liu <liuyang22@iscas.ac.cn>	2023-03-15 15:00:26 +0800
committer	GitHub <noreply@github.com>	2023-03-15 08:00:26 +0100
commit	b5f3d2565abac6e86ed520aa87a1e3e6ce98218d (patch)
tree	110be1d39bb6e26f681c702891abca3f4159b19a /src
parent	2fdf52afb83b2b09f152758e9ff84131bdaf6322 (diff)
download	box64-b5f3d2565abac6e86ed520aa87a1e3e6ce98218d.tar.gz box64-b5f3d2565abac6e86ed520aa87a1e3e6ce98218d.zip