about summary refs log tree commit diff stats
path: root/src/dynarec
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-06-04 04:04:28 +0800
committerGitHub <noreply@github.com>2024-06-03 22:04:28 +0200
commit48bb907d2ded6237ff25a9a18043480ecd1e69df (patch)
tree738f8f37b4ad72663cad3502bb024a19585fb0b8 /src/dynarec
parentc2ce5d5557f7c0f46a65ce70b05c1b19f440d70a (diff)
downloadbox64-48bb907d2ded6237ff25a9a18043480ecd1e69df.tar.gz
box64-48bb907d2ded6237ff25a9a18043480ecd1e69df.zip
[LA64_DYNAREC] Added more opcodes (#1558)
Diffstat (limited to 'src/dynarec')
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c35
-rw-r--r--src/dynarec/la64/dynarec_la64_64.c45
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c62
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c44
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c62
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c153
-rw-r--r--src/dynarec/la64/dynarec_la64_f20f.c13
-rw-r--r--src/dynarec/la64/dynarec_la64_f30f.c21
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h6
-rw-r--r--src/dynarec/la64/la64_emitter.h8
10 files changed, 447 insertions, 2 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index e96c3437..8c0e633c 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -270,6 +270,20 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITE2();
             }
             break;
+        case 0x2B:
+            INST_NAME("MOVNTPS Ex,Gx");
+            nextop = F8;
+            GETG;
+            v0 = sse_get_reg(dyn, ninst, x1, gd, 0);
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg_empty(dyn, ninst, x1, ed);
+                VOR_V(v1, v0, v0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VST(v0, ed, fixedaddress);
+            }
+            break;
         case 0x2E:
             // no special check...
         case 0x2F:
@@ -652,6 +666,27 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             else
                 BSTRINS_D(xFlags, x4, F_CF, F_CF);
             break;
+        case 0xA4:
+            nextop = F8;
+            INST_NAME("SHLD Ed, Gd, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            GETED(1);
+            GETGD;
+            u8 = F8;
+            emit_shld32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+            WBACK;
+            break;
+        case 0xAC:
+            nextop = F8;
+            INST_NAME("SHRD Ed, Gd, Ib");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            GETED(1);
+            GETGD;
+            u8 = F8;
+            u8 &= (rex.w ? 0x3f : 0x1f);
+            emit_shrd32c(dyn, ninst, rex, ed, gd, u8, x3, x4);
+            WBACK;
+            break;
         case 0xAE:
             nextop = F8;
             if (MODREG)
diff --git a/src/dynarec/la64/dynarec_la64_64.c b/src/dynarec/la64/dynarec_la64_64.c
index 9dd03c01..46f0ef06 100644
--- a/src/dynarec/la64/dynarec_la64_64.c
+++ b/src/dynarec/la64/dynarec_la64_64.c
@@ -8,6 +8,7 @@
 #include "dynarec.h"
 #include "emu/x64emu_private.h"
 #include "emu/x64run_private.h"
+#include "la64_emitter.h"
 #include "x64run.h"
 #include "x64emu.h"
 #include "box64stack.h"
@@ -60,6 +61,15 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
     GETREX();
 
     switch (opcode) {
+        case 0x03:
+            INST_NAME("ADD Gd, Seg:Ed");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            nextop = F8;
+            GETGD;
+            GETEDO(x4, 0);
+            emit_add32(dyn, ninst, rex, gd, ed, x3, x4, x5);
+            break;
         case 0x33:
             INST_NAME("XOR Gd, Seg:Ed");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -69,6 +79,20 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETEDO(x4, 0);
             emit_xor32(dyn, ninst, rex, gd, ed, x3, x4);
             break;
+        case 0x89:
+            INST_NAME("MOV Seg:Ed, Gd");
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            nextop = F8;
+            GETGD;
+            if (MODREG) { // reg <= reg
+                MVxw(TO_LA64((nextop & 7) + (rex.b << 3)), gd);
+            } else { // mem <= reg
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x1, &fixedaddress, rex, NULL, 1, 0);
+                ADD_D(x4, ed, x4);
+                SDxw(gd, x4, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         case 0x8B:
             INST_NAME("MOV Gd, Seg:Ed");
             grab_segdata(dyn, addr, ninst, x4, seg);
@@ -83,6 +107,27 @@ uintptr_t dynarec64_64(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 LDxw(gd, x4, fixedaddress);
             }
             break;
+        case 0xC7:
+            INST_NAME("MOV Seg:Ed, Id");
+            grab_segdata(dyn, addr, ninst, x4, seg);
+            nextop = F8;
+            if (MODREG) { // reg <= i32
+                i64 = F32S;
+                ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                MOV64xw(ed, i64);
+            } else { // mem <= i32
+                addr = geted(dyn, addr, ninst, nextop, &wback, x2, x1, &fixedaddress, rex, NULL, 1, 4);
+                i64 = F32S;
+                if (i64) {
+                    MOV64xw(x3, i64);
+                    ed = x3;
+                } else
+                    ed = xZR;
+                ADD_D(x4, wback, x4);
+                SDxw(ed, x4, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index 3d27d369..8250d502 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -309,6 +309,22 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_sub16(dyn, ninst, x1, x5, x2, x4, x6);
                     EWBACK;
                     break;
+                case 6: // XOR
+                    if (opcode == 0x81) {
+                        INST_NAME("XOR Ew, Iw");
+                    } else {
+                        INST_NAME("XOR Ew, Ib");
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEW(x1, (opcode == 0x81) ? 2 : 1);
+                    if (opcode == 0x81)
+                        i16 = F16S;
+                    else
+                        i16 = F8S;
+                    MOV32w(x5, i16);
+                    emit_xor16(dyn, ninst, x1, x5, x2, x4, x6);
+                    EWBACK;
+                    break;
                 case 7: // CMP
                     if (opcode == 0x81) {
                         INST_NAME("CMP Ew, Iw");
@@ -388,6 +404,45 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 BSTRINS_D(gd, x2, 15, 0);
             }
             break;
+        case 0xA5:
+            if (rep) {
+                INST_NAME("REP MOVSW");
+                CBZ_NEXT(xRCX);
+                ANDI(x1, xFlags, 1 << F_DF);
+                BNEZ_MARK2(x1);
+                MARK; // Part with DF==0
+                LD_H(x1, xRSI, 0);
+                ST_H(x1, xRDI, 0);
+                ADDI_D(xRSI, xRSI, 2);
+                ADDI_D(xRDI, xRDI, 2);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK(xRCX);
+                B_NEXT_nocond;
+                MARK2; // Part with DF==1
+                LD_H(x1, xRSI, 0);
+                ST_H(x1, xRDI, 0);
+                ADDI_D(xRSI, xRSI, -2);
+                ADDI_D(xRDI, xRDI, -2);
+                ADDI_D(xRCX, xRCX, -1);
+                BNEZ_MARK2(xRCX);
+                // done
+            } else {
+                INST_NAME("MOVSW");
+                GETDIR(x3, x1, 2);
+                LD_H(x1, xRSI, 0);
+                ST_H(x1, xRDI, 0);
+                ADD_D(xRSI, xRSI, x3);
+                ADD_D(xRDI, xRDI, x3);
+            }
+            break;
+        case 0xA9:
+            INST_NAME("TEST AX,Iw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            u16 = F16;
+            MOV32w(x2, u16);
+            BSTRPICK_D(x1, xRAX, 15, 0);
+            emit_test16(dyn, ninst, x1, x2, x3, x4, x5);
+            break;
         case 0xAB:
             if (rep) {
                 INST_NAME("REP STOSW");
@@ -570,6 +625,13 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MOV32w(x2, u16);
                     emit_test16(dyn, ninst, x1, x2, x3, x4, x5);
                     break;
+                case 3:
+                    INST_NAME("NEG Ew");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEW(x1, 0);
+                    emit_neg16(dyn, ninst, ed, x2, x4);
+                    EWBACK;
+                    break;
                 default:
                     DEFAULT;
             }
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 5768cc62..6f2b2490 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -395,12 +395,35 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             nextop = F8;
             GETEX(v1, 0, 0);
             GETGX_empty(v0);
-            // TODO: is there any way to support !box64_dynarec_fastround?
+            // TODO: !box64_dynarec_fastround
             q0 = fpu_get_scratch(dyn);
             VFCVT_S_D(q0, v1, v1);
             VXOR_V(v0, v0, v0);
             VEXTRINS_D(v0, q0, 0);
             break;
+        case 0x5B:
+            INST_NAME("CVTPS2DQ Gx, Ex");
+            nextop = F8;
+            GETEX(v1, 0, 0);
+            GETGX_empty(v0);
+            u8 = sse_setround(dyn, ninst, x6, x4);
+            VFTINT_W_S(v0, v1);
+            if (!box64_dynarec_fastround) {
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMP_S(q0, v1, v1, cEQ);
+                VLDI(q1, 0b1001110000000); // broadcast 0x80000000
+                VAND_V(v0, q0, v0);
+                VANDN_V(d1, q0, q1);
+                VOR_V(v0, v0, d1);
+                VSUBI_WU(d1, q1, 1);
+                VSEQ_W(q0, v0, d1);
+                VSRLI_W(q0, q0, 31);
+                VADD_W(v0, v0, q0);
+            }
+            x87_restoreround(dyn, ninst, u8);
+            break;
         case 0x5C:
             INST_NAME("SUBPD Gx, Ex");
             nextop = F8;
@@ -838,6 +861,13 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(q0, 0, 0);
             VAND_V(v0, v0, q0);
             break;
+        case 0xDC:
+            INST_NAME("PADDUSB Gx,Ex");
+            nextop = F8;
+            GETGX(q0, 1);
+            GETEX(q1, 0, 0);
+            VSADD_BU(q0, q0, q1);
+            break;
         case 0xDF:
             INST_NAME("PANDN Gx,Ex");
             nextop = F8;
@@ -852,6 +882,18 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(v1, 0, 0);
             VMUH_HU(v0, v0, v1);
             break;
+        case 0xE7:
+            INST_NAME("MOVNTDQ Ex, Gx");
+            nextop = F8;
+            GETGX(v0, 0);
+            if (MODREG) {
+                v1 = sse_get_reg_empty(dyn, ninst, x1, (nextop & 7) + (rex.b << 3));
+                VOR_V(v1, v0, v0);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VST(v0, ed, fixedaddress);
+            }
+            break;
         case 0xEB:
             INST_NAME("POR Gx,Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index b2caede0..03ffa80f 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -1541,3 +1541,65 @@ void emit_dec32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
 }
+
+
+// emit NEG16 instruction, from s1, store result in s1 using s2 and s3 as scratch
+void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3)
+{
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s3, d_neg16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+    IFX (X_AF | X_OF) {
+        MV(s3, s1); // s3 = op1
+    }
+
+    NOR(s1, s1, xZR);
+    BSTRPICK_D(s1, s1, 15, 0);
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        BEQZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+
+    IFX (X_AF | X_OF) {
+        OR(s3, s1, s3); // s3 = res | op1
+        IFX (X_AF) {
+            /* af = bc & 0x8 */
+            ANDI(s2, s3, 8);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            /* of = ((bc >> (width-2)) ^ (bc >> (width-1))) & 0x1; */
+            SRLI_D(s2, s3, 14);
+            SRLI_D(s3, s2, 1);
+            XOR(s2, s2, s3);
+            ANDI(s2, s2, 1);
+            BEQZ(s2, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+    IFX (X_SF) {
+        SRLI_D(s3, s1, 15 - F_SF); // put sign bit in place
+        ANDI(s3, s3, 1 << F_SF);   // 1<<F_SF is sign bit, so just mask
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s2);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        SPILL_EFLAGS();
+    }
+}
\ No newline at end of file
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index cff14cc8..7cc978a7 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -808,4 +808,155 @@ void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c,
             OR(xFlags, xFlags, s3);
         }
     }
-}
\ No newline at end of file
+}
+
+
+void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+{
+    c &= (rex.w ? 0x3f : 0x1f);
+
+    IFX (X_PEND) {
+        if (c) {
+            MOV64x(s3, c);
+            SDxw(s3, xEmu, offsetof(x64emu_t, op2));
+        } else
+            SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, rex.w ? d_shld64 : d_shld32);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (!c) {
+        IFX (X_PEND) {
+            SDxw(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        if (c > 0) {
+            SRLI_D(s3, s1, (rex.w ? 64 : 32) - c);
+            ANDI(s4, s3, 1); // F_CF
+            OR(xFlags, xFlags, s4);
+        }
+    }
+    IFX (X_OF) {
+        // Store sign for later use.
+        if (c == 1) SRLIxw(s4, s1, rex.w ? 63 : 31);
+    }
+
+    SLLIxw(s3, s1, c);
+    SRLIxw(s1, s2, (rex.w ? 64 : 32) - c);
+    OR(s1, s1, s3);
+
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX (X_SF) {
+        SRLIxw(s3, s1, rex.w ? 63 : 31);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_OF) {
+        // the OF flag is set if a sign change occurred
+        if (c == 1) {
+            SRLIxw(s3, s1, rex.w ? 63 : 31);
+            XOR(s3, s3, s4);
+            SLLI_D(s3, s3, F_OF);
+            ORI(xFlags, xFlags, s3);
+        }
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        SPILL_EFLAGS();
+    }
+}
+
+
+// emit SHRD32 instruction, from s1, fill s2 , constant c, store result in s1 using s3 and s4 as scratch
+void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4)
+{
+    c &= (rex.w ? 0x3f : 0x1f);
+
+    IFX (X_PEND) {
+        if (c) {
+            MOV64x(s3, c);
+            SDxw(s3, xEmu, offsetof(x64emu_t, op2));
+        } else
+            SDxw(xZR, xEmu, offsetof(x64emu_t, op2));
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SET_DF(s4, rex.w ? d_shrd64 : d_shrd32);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+    if (!c) {
+        IFX (X_PEND) {
+            SDxw(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        if (c > 1) {
+            SRAI_D(s3, s1, c - 1);
+            ANDI(s3, s3, 1); // LSB == F_CF
+        } else {
+            ANDI(s3, s1, 1); // LSB == F_CF
+        }
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_OF) {
+        // Store sign for later use.
+        if (c == 1) SRLIxw(s4, s1, rex.w ? 63 : 31);
+    }
+
+    SRLIxw(s3, s1, c);
+    SLLIxw(s1, s2, (rex.w ? 64 : 32) - c);
+    OR(s1, s1, s3);
+
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX (X_SF) {
+        SRLIxw(s3, s1, rex.w ? 63 : 31);
+        BEQZ(s3, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_OF) {
+        // the OF flag is set if a sign change occurred
+        if (c == 1) {
+            SRLI_D(s3, s1, rex.w ? 63 : 31);
+            XOR(s3, s3, s4);
+            SLLI_D(s3, s3, F_OF);
+            OR(xFlags, xFlags, s3);
+        }
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+
+    IFXA (X_ALL, la64_lbt) {
+        SPILL_EFLAGS();
+    }
+}
diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c
index eb03439f..6d0a5107 100644
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@@ -214,6 +214,19 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             MARK;
             VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
+        case 0x5D:
+            INST_NAME("MINSD Gx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEXSD(v1, 0, 0);
+            FCMP_D(fcc0, v0, v1, cUN);
+            BCNEZ_MARK(fcc0);
+            FCMP_D(fcc1, v1, v0, cLE);
+            BCEQZ_MARK2(fcc1);
+            MARK;
+            VEXTRINS_D(v0, v1, 0);
+            MARK2;
+            break;
         case 0x5E:
             INST_NAME("DIVSD Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index 5e1f0940..1d147a0a 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -165,6 +165,27 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             FCVT_D_S(d1, v1);
             VEXTRINS_D(v0, d1, 0);
             break;
+        case 0x5B:
+            INST_NAME("CVTTPS2DQ Gx, Ex");
+            nextop = F8;
+            GETEX(v1, 0, 0);
+            GETGX_empty(v0);
+            VFTINTRZ_W_S(v0, v1);
+            if (!box64_dynarec_fastround) {
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn);
+                d1 = fpu_get_scratch(dyn);
+                VFCMP_S(q0, v1, v1, cEQ);
+                VLDI(q1, 0b1001110000000); // broadcast 0x80000000
+                VAND_V(v0, q0, v0);
+                VANDN_V(d1, q0, q1);
+                VOR_V(v0, v0, d1);
+                VSUBI_WU(d1, q1, 1);
+                VSEQ_W(q0, v0, d1);
+                VSRLI_W(q0, q0, 31);
+                VADD_W(v0, v0, q0);
+            }
+            break;
         case 0x5C:
             INST_NAME("SUBSS Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index 9302d252..def57335 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -779,6 +779,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_sbb16          STEPNAME(emit_sbb16)
 #define emit_sbb32          STEPNAME(emit_sbb32)
 #define emit_neg8           STEPNAME(emit_neg8)
+#define emit_neg16          STEPNAME(emit_neg16)
 #define emit_neg32          STEPNAME(emit_neg32)
 #define emit_inc8           STEPNAME(emit_inc8)
 #define emit_inc16          STEPNAME(emit_inc16)
@@ -810,6 +811,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_shr32c         STEPNAME(emit_shr32c)
 #define emit_sar16          STEPNAME(emit_sar16)
 #define emit_sar32c         STEPNAME(emit_sar32c)
+#define emit_shld32c        STEPNAME(emit_shld32c)
+#define emit_shrd32c        STEPNAME(emit_shrd32c)
 #define emit_ror32c         STEPNAME(emit_ror32c)
 #define emit_rol32          STEPNAME(emit_rol32)
 #define emit_rol32c         STEPNAME(emit_rol32c)
@@ -877,6 +880,7 @@ void emit_sbb8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
 void emit_sbb16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sbb32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_neg8(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
+void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s3, int s4);
 void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3);
 void emit_inc8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_inc16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
@@ -908,6 +912,8 @@ void emit_shr32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
 void emit_shr32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_sar16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sar32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
+void emit_shld32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
+void emit_shrd32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, uint32_t c, int s3, int s4);
 void emit_ror32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
 void emit_rol32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_rol32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4);
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 83f5719c..e9756faf 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -919,6 +919,14 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VSUB_W(vd, vj, vk)          EMIT(type_3R(0b01110000000011010, vk, vj, vd))
 #define VSUB_D(vd, vj, vk)          EMIT(type_3R(0b01110000000011011, vk, vj, vd))
 #define VSUB_Q(vd, vj, vk)          EMIT(type_3R(0b01110001001011011, vk, vj, vd))
+#define VADDI_BU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100010100, imm5, vj, vd))
+#define VADDI_HU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100010101, imm5, vj, vd))
+#define VADDI_WU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100010110, imm5, vj, vd))
+#define VADDI_DU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100010111, imm5, vj, vd))
+#define VSUBI_BU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100011000, imm5, vj, vd))
+#define VSUBI_HU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100011001, imm5, vj, vd))
+#define VSUBI_WU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100011010, imm5, vj, vd))
+#define VSUBI_DU(vd, vj, imm5)      EMIT(type_2RI5(0b01110010100011011, imm5, vj, vd))
 #define VSADD_B(vd, vj, vk)         EMIT(type_3R(0b01110000010001100, vk, vj, vd))
 #define VSADD_H(vd, vj, vk)         EMIT(type_3R(0b01110000010001101, vk, vj, vd))
 #define VSADD_W(vd, vj, vk)         EMIT(type_3R(0b01110000010001110, vk, vj, vd))