about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-04-06 20:45:22 +0800
committerGitHub <noreply@github.com>2024-04-06 14:45:22 +0200
commitb96139274fcb83be3e9085a1a06084364c938bc5 (patch)
tree12df4a027bcc17a1f5ffab665eb67263c32d5655 /src
parentd84faf57ab384344017f57c1b1d261352a320bab (diff)
downloadbox64-b96139274fcb83be3e9085a1a06084364c938bc5.tar.gz
box64-b96139274fcb83be3e9085a1a06084364c938bc5.zip
[LA64_DYNAREC] Added more opcodes (#1425)
* Added 0B OR opcode

* Added D3 /7 SAR opcode

* Added D3 /5 SHR opcode

* Added 80 /1 OR opcode

* Addeded  66 0F BE MOVSX opcode

* Fixed SRAxw

* Fix

* Added 0F C8..CF BSWAP opcode

* Added more opcodes
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c56
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c12
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c27
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_logic.c49
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c68
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h14
-rw-r--r--src/dynarec/la64/la64_emitter.h56
7 files changed, 272 insertions, 10 deletions
diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 2100ae73..c32d1bc3 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -120,6 +120,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_or32(dyn, ninst, rex, ed, gd, x3, x4);
             WBACK;
             break;
+        case 0x0B:
+            INST_NAME("OR Gd, Ed");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGD;
+            GETED(0);
+            emit_or32(dyn, ninst, rex, gd, ed, x3, x4);
+            break;
         case 0x0D:
             INST_NAME("OR EAX, Id");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -520,6 +528,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         case 0x80:
             nextop = F8;
             switch((nextop>>3)&7) {
+                case 1: // OR
+                    INST_NAME("OR Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_or8c(dyn, ninst, x1, u8, x2, x4, x5);
+                    EBBACK();
+                    break;
                 case 4: // AND
                     INST_NAME("AND Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -536,6 +552,14 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_sub8c(dyn, ninst, x1, u8, x2, x4, x5, x6);
                     EBBACK();
                     break;
+                case 6: // XOR
+                    INST_NAME("XOR Eb, Ib");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEB(x1, 1);
+                    u8 = F8;
+                    emit_xor8c(dyn, ninst, x1, u8, x2, x4);
+                    EBBACK();
+                    break;
                 case 7: // CMP
                     INST_NAME("CMP Eb, Ib");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -785,6 +809,15 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 ZEROUP(xRAX);
             }
             break;
+        case 0x99:
+            INST_NAME("CDQ");
+            if (rex.w) {
+                SRAI_D(xRDX, xRAX, 63);
+            } else {
+                SRAI_W(xRDX, xRAX, 31);
+                BSTRPICK_D(xRDX, xRDX, 31, 0);
+            }
+            break;
         case 0xA0:
             INST_NAME("MOV AL,Ob");
             if(rex.is32bits) u64 = F32; else u64 = F64;
@@ -1239,6 +1272,29 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_shl32(dyn, ninst, rex, ed, x3, x5, x4, x6);
                     WBACK;
                     break;
+                case 5:
+                    INST_NAME("SHR Ed, CL");
+                    SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                    ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
+                    GETED(0);
+                    if (!rex.w && MODREG) { ZEROUP(ed); }
+                    CBZ_NEXT(x3);
+                    emit_shr32(dyn, ninst, rex, ed, x3, x5, x4);
+                    WBACK;
+                    break;
+                case 7:
+                    INST_NAME("SAR Ed, CL");
+                    SETFLAGS(X_ALL, SF_PENDING);
+                    ANDI(x3, xRCX, rex.w ? 0x3f : 0x1f);
+                    GETED(0);
+                    if (!rex.w && MODREG) { ZEROUP(ed); }
+                    CBZ_NEXT(x3);
+                    UFLAG_OP12(ed, x3);
+                    SRAxw(ed, ed, x3);
+                    WBACK;
+                    UFLAG_RES(ed);
+                    UFLAG_DF(x3, rex.w ? d_sar64 : d_sar32);
+                    break;
                 default:
                     DEFAULT;
             }
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index c6879562..6bd7c3d4 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -411,6 +411,18 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             }
             if (!rex.w) ZEROUP(gd);
             break;
+        case 0xC8:
+        case 0xC9:
+        case 0xCA:
+        case 0xCB:
+        case 0xCC:
+        case 0xCD:
+        case 0xCE:
+        case 0xCF:
+            INST_NAME("BSWAP Reg");
+            gd = TO_LA64((opcode & 7) + (rex.b << 3));
+            REVBxw(gd, gd);
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 8a8a06fa..cdafe2e1 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -106,6 +106,33 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VLD(v0, ed, fixedaddress);
             }
             break;
+        case 0xBE:
+            INST_NAME("MOVSX Gw, Eb");
+            nextop = F8;
+            GETGD;
+            if (MODREG) {
+                if (rex.rex) {
+                    ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                    eb1 = ed;
+                    eb2 = 0;
+                } else {
+                    ed = (nextop & 7);
+                    eb1 = TO_LA64(ed & 3); // Ax, Cx, Dx or Bx
+                    eb2 = (ed & 4) >> 2;   // L or H
+                }
+                if (eb2) {
+                    SRLI_D(x1, eb1, eb2 * 8);
+                    EXT_W_B(x1, x1);
+                } else {
+                    EXT_W_B(x1, eb1);
+                }
+            } else {
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x4, &fixedaddress, rex, NULL, 1, 0);
+                LD_B(x1, ed, fixedaddress);
+            }
+            BSTRINS_D(gd, x1, 15, 0);
+            break;
         case 0xEF:
             INST_NAME("PXOR Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_logic.c b/src/dynarec/la64/dynarec_la64_emit_logic.c
index 9f4ce0e1..b8f3e8be 100644
--- a/src/dynarec/la64/dynarec_la64_emit_logic.c
+++ b/src/dynarec/la64/dynarec_la64_emit_logic.c
@@ -22,6 +22,46 @@
 #include "dynarec_la64_helper.h"
 
 
+// emit XOR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4)
+{
+    IFX (X_PEND) {
+        SET_DF(s4, d_xor8);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            ADDI_D(s3, xZR, c & 0xff);
+            X64_XOR_B(s1, s3);
+        }
+        XORI(s1, s1, c & 0xff);
+        IFX (X_PEND)
+            ST_B(s1, xEmu, offsetof(x64emu_t, res));
+        return;
+    }
+
+    XORI(s1, s1, c & 0xff);
+    ANDI(s1, s1, 0xff);
+    CLEAR_FLAGS(s3);
+    IFX (X_SF) {
+        SRLI_D(s3, s1, 7);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_PEND) {
+        ST_B(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit XOR32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
 void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
 {
@@ -429,4 +469,11 @@ void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4)
     IFX (X_PF) {
         emit_pf(dyn, ninst, s1, s3, s4);
     }
-}
\ No newline at end of file
+}
+
+// emit OR8 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_or8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4)
+{
+    MOV32w(s2, c & 0xff);
+    emit_or8(dyn, ninst, s1, s2, s3, s4);
+}
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index 3cf6d41d..883968f8 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -166,6 +166,74 @@ void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
     }
 }
 
+
+// emit SHR32 instruction, from s1 , shift s2 (!0 and and'd already), store result in s1 using s3 and s4 as scratch
+void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4)
+{
+    int64_t j64;
+
+
+    IFX (X_PEND) {
+        SDxw(s2, xEmu, offsetof(x64emu_t, op2));
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, rex.w ? d_shr64 : d_shr32);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            if (rex.w)
+                X64_SRL_D(s1, s2);
+            else
+                X64_SRL_W(s1, s2);
+        }
+        SRL_D(s1, s1, s2);
+        if (!rex.w) { ZEROUP(s1); }
+        IFX (X_PEND) {
+            SDxw(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        ADDI_D(s3, s2, -1);
+        SRA_D(s3, s1, s3);
+        ANDI(s3, s3, 1); // LSB == F_CF
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        // OF flag is set to the most-significant bit of the original operand
+        ADDI_D(s3, xZR, 1);
+        BEQ(s2, s3, 4 + 4 * 4);
+        SRLIxw(s3, s1, rex.w ? 63 : 31);
+        SLLI_D(s3, s3, F_OF);
+        OR(xFlags, xFlags, s3);
+    }
+
+    SRL_D(s1, s1, s2);
+
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SHR32 instruction, from s1 , constant c, store result in s1 using s3 and s4 as scratch
 void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4)
 {
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 481dbb68..49229c09 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -211,8 +211,12 @@
             wb2 = (wback >> 2) * 8;                                                             \
             wback = TO_LA64(wback & 3);                                                         \
         }                                                                                       \
-        if (wb2) { SRLI_D(i, wback, wb2); }                                                     \
-        EXT_W_B(i, i);                                                                          \
+        if (wb2) {                                                                              \
+            SRLI_D(i, wback, wb2);                                                              \
+            EXT_W_B(i, i);                                                                      \
+        } else {                                                                                \
+            EXT_W_B(i, wback);                                                                  \
+        }                                                                                       \
         wb1 = 0;                                                                                \
         ed = i;                                                                                 \
     } else {                                                                                    \
@@ -652,6 +656,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_or32           STEPNAME(emit_or32)
 #define emit_or32c          STEPNAME(emit_or32c)
 #define emit_or8            STEPNAME(emit_or8)
+#define emit_or8c           STEPNAME(emit_or8c)
+#define emit_xor8c          STEPNAME(emit_xor8c)
 #define emit_xor32          STEPNAME(emit_xor32)
 #define emit_xor32c         STEPNAME(emit_xor32c)
 #define emit_and8           STEPNAME(emit_and8)
@@ -660,6 +666,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_and32c         STEPNAME(emit_and32c)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shl32c         STEPNAME(emit_shl32c)
+#define emit_shr32          STEPNAME(emit_shr32)
 #define emit_shr32c         STEPNAME(emit_shr32c)
 #define emit_sar32c         STEPNAME(emit_sar32c)
 #define emit_ror32c         STEPNAME(emit_ror32c)
@@ -720,6 +727,8 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_or32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_or32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_or8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
+void emit_or8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
+void emit_xor8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s4);
 void emit_xor32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_xor32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_and8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
@@ -728,6 +737,7 @@ void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
+void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index eea358b7..49b3b985 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -247,7 +247,7 @@ f24-f31  fs0-fs7   Static registers                Callee
 // GR[rd] = SRL(GR[rj][63:0], GR[rk][5:0])
 #define SRL_D(rd, rj, rk) EMIT(type_3R(0b00000000000110010, rk, rj, rd))
 // GR[rd] = SRA(GR[rj][63:0], GR[rk][5:0])
-#define SLA_D(rd, rj, rk) EMIT(type_3R(0b00000000000110011, rk, rj, rd))
+#define SRA_D(rd, rj, rk) EMIT(type_3R(0b00000000000110011, rk, rj, rd))
 // GR[rd] = ROTR(GR[rj][63:0], GR[rk][5:0])
 #define ROTR_D(rd, rj, rk) EMIT(type_3R(0b00000000000110111, rk, rj, rd))
 
@@ -269,13 +269,25 @@ f24-f31  fs0-fs7   Static registers                Callee
 // GR[rd] = ROTR(GR[rj][31:0], imm5) (Rotate To Right)
 #define ROTRI_W(rd, rj, imm5) EMIT(type_2RI5(0b00000000010011001, imm5, rj, rd))
 
+#define SRAxw(rd, rj, rk)      \
+    do {                       \
+        if (rex.w) {           \
+            SRA_D(rd, rj, rk); \
+        } else {               \
+            SRA_W(rd, rj, rk); \
+            ZEROUP(rd);        \
+        }                      \
+    } while (0)
+
 // Shift Left Immediate
-#define SLLIxw(rd, rs1, imm)  \
-    if (rex.w) {              \
-        SLLI_D(rd, rs1, imm); \
-    } else {                  \
-        SLLI_W(rd, rs1, imm); \
-    }
+#define SLLIxw(rd, rs1, imm)      \
+    do {                          \
+        if (rex.w) {              \
+            SLLI_D(rd, rs1, imm); \
+        } else {                  \
+            SLLI_W(rd, rs1, imm); \
+        }                         \
+    } while (0)
 // Shift Right Logical Immediate
 #define SRLIxw(rd, rs1, imm)      \
     do {                          \
@@ -400,6 +412,36 @@ f24-f31  fs0-fs7   Static registers                Callee
 // ZERO the upper part
 #define ZEROUP(rd) BSTRINS_D(rd, xZR, 63, 32);
 
+#define CLO_W(rd, rj)     EMIT(type_2R(0b0000000000000000000100, rj, rd))
+#define CLZ_W(rd, rj)     EMIT(type_2R(0b0000000000000000000101, rj, rd))
+#define CTO_W(rd, rj)     EMIT(type_2R(0b0000000000000000000110, rj, rd))
+#define CTZ_W(rd, rj)     EMIT(type_2R(0b0000000000000000000111, rj, rd))
+#define CLO_D(rd, rj)     EMIT(type_2R(0b0000000000000000001000, rj, rd))
+#define CLZ_D(rd, rj)     EMIT(type_2R(0b0000000000000000001001, rj, rd))
+#define CTO_D(rd, rj)     EMIT(type_2R(0b0000000000000000001010, rj, rd))
+#define CTZ_D(rd, rj)     EMIT(type_2R(0b0000000000000000001011, rj, rd))
+#define REVB_2H(rd, rj)   EMIT(type_2R(0b0000000000000000001100, rj, rd))
+#define REVB_4H(rd, rj)   EMIT(type_2R(0b0000000000000000001101, rj, rd))
+#define REVB_2W(rd, rj)   EMIT(type_2R(0b0000000000000000001110, rj, rd))
+#define REVB_D(rd, rj)    EMIT(type_2R(0b0000000000000000001111, rj, rd))
+#define REVH_2W(rd, rj)   EMIT(type_2R(0b0000000000000000010000, rj, rd))
+#define REVH_D(rd, rj)    EMIT(type_2R(0b0000000000000000010001, rj, rd))
+#define BITREV_4B(rd, rj) EMIT(type_2R(0b0000000000000000010010, rj, rd))
+#define BITREV_8B(rd, rj) EMIT(type_2R(0b0000000000000000010011, rj, rd))
+#define BITREV_W(rd, rj)  EMIT(type_2R(0b0000000000000000010100, rj, rd))
+#define BITREV_D(rd, rj)  EMIT(type_2R(0b0000000000000000010101, rj, rd))
+
+#define REVBxw(rd, rj)       \
+    do {                     \
+        if (rex.w) {         \
+            REVB_D(rd, rj);  \
+        } else {             \
+            REVB_2W(rd, rj); \
+            ZEROUP(rd);      \
+        }                    \
+    } while (0)
+
+
 // GR[rd] = SignExtend(GR[rj][7:0], GRLEN)
 #define EXT_W_B(rd, rj) EMIT(type_2R(0b0000000000000000010111, rj, rd))