about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-05 22:34:59 +0800
committerGitHub <noreply@github.com>2024-04-05 16:34:59 +0200
commit4d260217054f16e4efc31c9bef7974898f43252e (patch)
treed0e09e0c686ffd0f87cdd11430902838da084f04 /src
parent2e9b8eff59cec7a0170f9dc2f1fb6212a4f3ab85 (diff)
downloadbox64-4d260217054f16e4efc31c9bef7974898f43252e.tar.gz
box64-4d260217054f16e4efc31c9bef7974898f43252e.zip
[LA64_DYNAREC] Added more opcodes (#1416)
* [LA64_DYNAREC] Added 0F AF IMUL opcode

* Update clang-format rules

* Added 08 OR opcode

* Added F7 /3 NEG opcode and fixed some potential bugs
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c16
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c67
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_logic.c39
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c86
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h17
-rw-r--r--src/dynarec/la64/la64_emitter.h72
-rw-r--r--src/dynarec/rv64/dynarec_rv64_0f.c1
7 files changed, 295 insertions, 3 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index fe7b042d..56765fe1 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -102,6 +102,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             i64 = F32S;
             emit_add32c(dyn, ninst, rex, xRAX, i64, x3, x4, x5, x6);
             break;
+        case 0x08:
+            INST_NAME("OR Eb, Gb");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETEB(x1, 0);
+            GETGB(x2);
+            emit_or8(dyn, ninst, x1, x2, x4, x5);
+            EBBACK();
+            break;
         case 0x09:
             INST_NAME("OR Ed, Gd");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -1128,6 +1137,13 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                         ZEROUP(ed);
                     WBACK;
                     break;
+                case 3:
+                    INST_NAME("NEG Ed");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETED(0);
+                    emit_neg32(dyn, ninst, rex, ed, x3, x4);
+                    WBACK;
+                    break;
                 default:
                     DEFAULT;
             }
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index cbd5f0e1..f4160c2f 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -261,6 +261,73 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             LD_D(xRDX, xEmu, offsetof(x64emu_t, regs[_DX]));
             LD_D(xRBX, xEmu, offsetof(x64emu_t, regs[_BX]));
             break;
+        case 0xAF:
+            INST_NAME("IMUL Gd, Ed");
+            SETFLAGS(X_ALL, SF_PENDING);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            if (box64_dynarec_test) {
+                // avoid noise during test
+                CLEAR_FLAGS(x3);
+            }
+            if (rex.w) {
+                // 64bits imul
+                UFLAG_IF {
+                    MULH_D(x3, gd, ed);
+                    MUL_D(gd, gd, ed);
+                    IFX (X_PEND) {
+                        UFLAG_OP1(x3);
+                        UFLAG_RES(gd);
+                        UFLAG_DF(x3, d_imul64);
+                    } else {
+                        SET_DFNONE();
+                    }
+                    IFX (X_CF | X_OF) {
+                        SRAI_D(x4, gd, 63);
+                        XOR(x3, x3, x4);
+                        SNEZ(x3, x3);
+                        IFX (X_CF) {
+                            BSTRINS_D(xFlags, x3, F_CF, F_CF);
+                        }
+                        IFX (X_OF) {
+                            BSTRINS_D(xFlags, x3, F_OF, F_OF);
+                        }
+                    }
+                } else {
+                    MULxw(gd, gd, ed);
+                }
+            } else {
+                // 32bits imul
+                UFLAG_IF {
+                    MUL_D(gd, gd, ed);
+                    SRLI_D(x3, gd, 32);
+                    SLLI_W(gd, gd, 0);
+                    IFX (X_PEND) {
+                        UFLAG_RES(gd);
+                        UFLAG_OP1(x3);
+                        UFLAG_DF(x4, d_imul32);
+                    } else IFX (X_CF | X_OF) {
+                        SET_DFNONE();
+                    }
+                    IFX (X_CF | X_OF) {
+                        SRAI_W(x4, gd, 31);
+                        SUB_D(x3, x3, x4);
+                        SNEZ(x3, x3);
+                        IFX (X_CF) {
+                            BSTRINS_D(xFlags, x3, F_CF, F_CF);
+                        }
+                        IFX (X_OF) {
+                            BSTRINS_D(xFlags, x3, F_OF, F_OF);
+                        }
+                    }
+                } else {
+                    MULxw(gd, gd, ed);
+                }
+                SLLI_D(gd, gd, 32);
+                SRLI_D(gd, gd, 32);
+            }
+            break;
         case 0xB6:
             INST_NAME("MOVZX Gd, Eb");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c
index 2aa4315d..34315330 100644
--- a/src/dynarec/la64/dynarec_la64_emit_logic.c
+++ b/src/dynarec/la64/dynarec_la64_emit_logic.c
@@ -245,7 +245,7 @@ void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3
         SET_DFNONE();
     }
 
-    IFXA(X_ALL, la64_lbt) {
+    IFXA (X_ALL, la64_lbt) {
         if (rex.w)
             X64_OR_D(s1, s2);
         else
@@ -326,3 +326,40 @@ void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, in
         emit_pf(dyn, ninst, s1, s3, s4);
     }
 }
+
+
+// emit OR8 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch, s4 can be same as s2 (and so s2 destroyed)
+void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
+{
+    IFX (X_PEND) {
+        SET_DF(s3, d_or8);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        X64_OR_B(s1, s2);
+    }
+
+    OR(s1, s1, s2);
+
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) return;
+
+    CLEAR_FLAGS(s3);
+    IFX (X_SF) {
+        SRLI_D(s3, s1, 7);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
\ No newline at end of file
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index 600c8c49..f47eb082 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -39,7 +39,10 @@ void emit_add32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 
     if (la64_lbt) {
         IFX(X_ALL) {
-            X64_ADD_WU(s1, s2);
+            if (rex.w)
+                X64_ADD_DU(s1, s2);
+            else
+                X64_ADD_WU(s1, s2);
         }
         ADDxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -465,7 +468,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 
     if (la64_lbt) {
         IFX(X_ALL) {
-            X64_SUB_WU(s1, s2);
+            if (rex.w)
+                X64_SUB_DU(s1, s2);
+            else
+                X64_SUB_WU(s1, s2);
         }
         SUBxw(s1, s1, s2);
         if (!rex.w) ZEROUP(s1);
@@ -577,3 +583,79 @@ void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
         emit_pf(dyn, ninst, s1, s3, s4);
     }
 }
+
+
+// emit NEG32 instruction, from s1, store result in s1 using s2 and s3 as scratch
+void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3)
+{
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, rex.w ? d_neg64 : d_neg32);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (!la64_lbt) {
+        IFX (X_AF | X_OF) {
+            MV(s3, s1); // s3 = op1
+        }
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        if (rex.w)
+            X64_SUB_DU(xZR, s1);
+        else
+            X64_SUB_WU(xZR, s1);
+    }
+
+    NEGxw(s1, s1);
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    if (la64_lbt) {
+        if (!rex.w) {
+            ZEROUP(s1);
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        BEQZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    IFX (X_AF | X_OF) {
+        OR(s3, s1, s3); // s3 = res | op1
+        IFX (X_AF) {
+            /* af = bc & 0x8 */
+            ANDI(s2, s3, 8);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
+            SRLI_D(s2, s3, (rex.w ? 64 : 32) - 2);
+            SRLI_D(s3, s2, 1);
+            XOR(s2, s2, s3);
+            ANDI(s2, s2, 1);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
\ No newline at end of file
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 3405afcb..d3acc05f 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -441,6 +441,19 @@
 #ifndef SET_HASCALLRET
 #define SET_HASCALLRET()
 #endif
+#define UFLAG_OP1(A) \
+    if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op1)); }
+#define UFLAG_OP2(A) \
+    if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, op2)); }
+#define UFLAG_OP12(A1, A2)                       \
+    if (dyn->insts[ninst].x64.gen_flags) {       \
+        SDxw(A1, xEmu, offsetof(x64emu_t, op1)); \
+        SDxw(A2, xEmu, offsetof(x64emu_t, op2)); \
+    }
+#define UFLAG_RES(A) \
+    if (dyn->insts[ninst].x64.gen_flags) { SDxw(A, xEmu, offsetof(x64emu_t, res)); }
+#define UFLAG_DF(r, A) \
+    if (dyn->insts[ninst].x64.gen_flags) { SET_DF(r, A) }
 #define UFLAG_IF if (dyn->insts[ninst].x64.gen_flags)
 #ifndef DEFAULT
 #define DEFAULT \
@@ -542,8 +555,10 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_sub32c         STEPNAME(emit_sub32c)
 #define emit_sub8           STEPNAME(emit_sub8)
 #define emit_sub8c          STEPNAME(emit_sub8c)
+#define emit_neg32          STEPNAME(emit_neg32)
 #define emit_or32           STEPNAME(emit_or32)
 #define emit_or32c          STEPNAME(emit_or32c)
+#define emit_or8            STEPNAME(emit_or8)
 #define emit_xor32          STEPNAME(emit_xor32)
 #define emit_and8           STEPNAME(emit_and8)
 #define emit_and8c          STEPNAME(emit_and8c)
@@ -601,8 +616,10 @@ void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
 void emit_sub8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sub8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4, int s5);
+void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
 void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
+void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 9e584062..291b28da 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -309,6 +309,76 @@ f24-f31  fs0-fs7   Static registers                Callee
 
 #define SEXT_W(rd, rs1) SLLI_W(rd, rs1, 0)
 
+// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
+// GR[rd] = SignExtend(product[31:0], GRLEN)
+#define MUL_W(rd, rj, rk) EMIT(type_3R(0b00000000000111000, rk, rj, rd))
+
+// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
+// GR[rd] = SignExtend(product[63:32], GRLEN)
+#define MULH_W(rd, rj, rk) EMIT(type_3R(0b00000000000111001, rk, rj, rd))
+
+// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0])
+// GR[rd] = SignExtend(product[63:32], GRLEN)
+#define MULH_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111010, rk, rj, rd))
+
+// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0])
+// GR[rd] = product[63:0]
+#define MUL_D(rd, rj, rk) EMIT(type_3R(0b00000000000111011, rk, rj, rd))
+
+// product = signed(GR[rj][63:0]) * signed(GR[rk][63:0])
+// GR[rd] = product[127:64]
+#define MULH_D(rd, rj, rk) EMIT(type_3R(0b00000000000111100, rk, rj, rd))
+
+// product = unsigned(GR[rj][63:0]) * unsigned(GR[rk][63:0])
+// GR[rd] = product[127:64]
+#define MULH_DU(rd, rj, rk) EMIT(type_3R(0b00000000000111101, rk, rj, rd))
+
+// product = signed(GR[rj][31:0]) * signed(GR[rk][31:0])
+// GR[rd] = product[63:0]
+#define MULW_D_W(rd, rj, rk) EMIT(type_3R(0b00000000000111110, rk, rj, rd))
+
+// product = unsigned(GR[rj][31:0]) * unsigned(GR[rk][31:0])
+// GR[rd] = product[63:0]
+#define MULW_D_WU(rd, rj, rk) EMIT(type_3R(0b00000000000111111, rk, rj, rd))
+
+// quotient = signed(GR[rj][31:0]) / signed(GR[rk][31:0])
+// GR[rd] = SignExtend(quotient[31:0], GRLEN)
+#define DIV_W(rd, rj, rk) EMIT(type_3R(0b00000000001000000, rk, rj, rd))
+
+// quotient = unsigned(GR[rj][31:0]) / unsigned(GR[rk][31:0])
+// GR[rd] = SignExtend(quotient[31:0], GRLEN)
+#define DIV_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000010, rk, rj, rd))
+
+// remainder = signed(GR[rj][31:0]) % signed(GR[rk][31:0])
+// GR[rd] = SignExtend(remainder[31:0], GRLEN)
+#define MOD_W(rd, rj, rk) EMIT(type_3R(0b00000000001000001, rk, rj, rd))
+
+// remainder = unsigned(GR[rj][31:0]) % unsigned(GR[rk][31:0])
+// GR[rd] = SignExtend(remainder[31:0], GRLEN)
+#define MOD_WU(rd, rj, rk) EMIT(type_3R(0b00000000001000011, rk, rj, rd))
+
+// GR[rd] = signed(GR[rj][63:0]) / signed(GR[rk][63:0])
+#define DIV_D(rd, rj, rk) EMIT(type_3R(0b00000000001000100, rk, rj, rd))
+
+// GR[rd] = unsigned(GR[rj][63:0]) / unsigned(GR[rk][63:0])
+#define DIV_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000110, rk, rj, rd))
+
+// GR[rd] = signed(GR[rj] [63:0]) % signed(GR[rk] [63:0])
+#define MOD_D(rd, rj, rk) EMIT(type_3R(0b00000000001000101, rk, rj, rd))
+
+// GR[rd] = unsigned(GR[rj] [63:0]) % unsigned(GR[rk] [63:0])
+#define MOD_DU(rd, rj, rk) EMIT(type_3R(0b00000000001000111, rk, rj, rd))
+
+#define MULxw(rd, rj, rk)      \
+    do {                       \
+        if (rex.w) {           \
+            MUL_D(rd, rj, rk); \
+        } else {               \
+            MUL_W(rd, rj, rk); \
+        }                      \
+    } while (0)
+
+
 // bstr32[31:msbw+1] = GR[rd][31: msbw+1]
 // bstr32[msbw:lsbw] = GR[rj][msbw-lsbw:0]
 // bstr32[lsbw-1:0] = GR[rd][lsbw-1:0]
@@ -1709,6 +1779,8 @@ LSX instruction starts with V, LASX instruction starts with XV.
             SUB_W(rd, rj, rk); \
     } while (0)
 
+#define NEGxw(rd, rs1) SUBxw(rd, xZR, rs1)
+
 #define SUBz(rd, rj, rk)       \
     do {                       \
         if (rex.is32bits)      \
diff --git a/src/dynarec/rv64/dynarec_rv64_0f.c b/src/dynarec/rv64/dynarec_rv64_0f.c
index e7191fc9..a7c54e58 100644
--- a/src/dynarec/rv64/dynarec_rv64_0f.c
+++ b/src/dynarec/rv64/dynarec_rv64_0f.c
@@ -1445,6 +1445,7 @@ uintptr_t dynarec64_0F(dynarec_rv64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             }
             break;
         case 0xAF:
+            // TODO: Refine this
             INST_NAME("IMUL Gd, Ed");
             SETFLAGS(X_ALL, SF_PENDING);
             nextop = F8;