about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
authorYang Liu <numbksco@gmail.com>2024-05-04 03:18:42 +0800
committerGitHub <noreply@github.com>2024-05-03 21:18:42 +0200
commit1501592fa87cde05f22ea7ec3b2587ffe51134f9 (patch)
tree7fb03c7c64215393706098456a8ab7e897b9f8c7 /src
parent12467916a4ce5bf8042324f0dbb10a93da41d834 (diff)
downloadbox64-1501592fa87cde05f22ea7ec3b2587ffe51134f9.tar.gz
box64-1501592fa87cde05f22ea7ec3b2587ffe51134f9.zip
[LA64_DYNAREC] Added more opcodes (#1491)
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/arm64/dynarec_arm64_0f.c6
-rw-r--r--src/dynarec/la64/dynarec_la64_00.c23
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c75
-rw-r--r--src/dynarec/la64/dynarec_la64_66.c76
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c33
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c98
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_shift.c64
-rw-r--r--src/dynarec/la64/dynarec_la64_f20f.c68
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c37
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.h32
-rw-r--r--src/dynarec/la64/la64_emitter.h6
11 files changed, 512 insertions, 6 deletions
diff --git a/src/dynarec/arm64/dynarec_arm64_0f.c b/src/dynarec/arm64/dynarec_arm64_0f.c
index 37ce5743..82669dcc 100644
--- a/src/dynarec/arm64/dynarec_arm64_0f.c
+++ b/src/dynarec/arm64/dynarec_arm64_0f.c
@@ -657,7 +657,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     GETEM(q1, 0);

                     ABS_32(q0, q1);

                     break;

-                

+

                 case 0xC8:

                     INST_NAME("SHA1NEXTE Gx, Ex");

                     nextop = F8;

@@ -900,7 +900,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                                 v1 = v0;

                             else

                                 v1 = fpu_get_scratch(dyn);

-                        } else 

+                        } else

                             v1 = q1;

                         if(v1!=v0) {

                             VEXTQ_8(v1, q1, q1, 8);

@@ -1916,7 +1916,7 @@ uintptr_t dynarec64_0F(dynarec_arm_t* dyn, uintptr_t addr, uintptr_t ip, int nin
                     MOVxw_REG(xRAX, x1);    // upper par of RAX will be erase on 32bits, no mater what

                 }

                 break;

-                

+

         case 0xB3:

             INST_NAME("BTR Ed, Gd");

             SETFLAGS(X_CF, SF_SUBSET);

diff --git a/src/dynarec/la64/dynarec_la64_00.c b/src/dynarec/la64/dynarec_la64_00.c
index 6e5ec591..3eb172ab 100644
--- a/src/dynarec/la64/dynarec_la64_00.c
+++ b/src/dynarec/la64/dynarec_la64_00.c
@@ -655,6 +655,23 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     emit_or32c(dyn, ninst, rex, ed, i64, x3, x4);
                     WBACK;
                     break;
+                case 2: // ADC
+                    if (opcode == 0x81) {
+                        INST_NAME("ADC Ed, Id");
+                    } else {
+                        INST_NAME("ADC Ed, Ib");
+                    }
+                    READFLAGS(X_CF);
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETED((opcode == 0x81) ? 4 : 1);
+                    if (opcode == 0x81)
+                        i64 = F32S;
+                    else
+                        i64 = F8S;
+                    MOV64xw(x5, i64);
+                    emit_adc32(dyn, ninst, rex, ed, x5, x3, x4, x6, x1);
+                    WBACK;
+                    break;
                 case 4: // AND
                     if (opcode == 0x81) {
                         INST_NAME("AND Ed, Id");
@@ -1811,6 +1828,12 @@ uintptr_t dynarec64_00(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                     MOV32w(x2, u8);
                     emit_test8(dyn, ninst, x1, x2, x3, x4, x5);
                     break;
+                case 2:
+                    INST_NAME("NOT Eb");
+                    GETEB(x1, 0);
+                    NOR(x1, x1, xZR);
+                    EBBACK();
+                    break;
                 case 3:
                     INST_NAME("NEG Eb");
                     SETFLAGS(X_ALL, SF_SET_PENDING);
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 6fb729f4..8ddae253 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -104,6 +104,22 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             LOAD_XEMU_REM();
             jump_to_epilog(dyn, 0, xRIP, ninst);
             break;
+        case 0x10:
+            INST_NAME("MOVUPS Gx,Ex");
+            nextop = F8;
+            GETG;
+            if (MODREG) {
+                ed = (nextop & 7) + (rex.b << 3);
+                v1 = sse_get_reg(dyn, ninst, x1, ed, 0);
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                VOR_V(v0, v1, v1);
+            } else {
+                v0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                SMREAD();
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                VLD(v0, ed, fixedaddress);
+            }
+            break;
         case 0x11:
             INST_NAME("MOVUPS Ex,Gx");
             nextop = F8;
@@ -142,6 +158,14 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGX(v0, 1);
             VILVL_W(v0, q0, v0);
             break;
+        case 0x15:
+            INST_NAME("UNPCKHPS Gx, Ex");
+            nextop = F8;
+            SMREAD();
+            GETEX(q0, 0, 0);
+            GETGX(v0, 1);
+            VILVH_W(v0, q0, v0);
+            break;
         case 0x16:
             nextop = F8;
             if (MODREG) {
@@ -158,6 +182,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             }
             VILVL_D(v0, v1, v0); // v0[127:64] = v1[63:0]
             break;
+        case 0x17:
+            nextop = F8;
+            INST_NAME("MOVHPS Ex,Gx");
+            GETGX(v0, 0);
+            if(MODREG) {
+                v1 = sse_get_reg(dyn, ninst, x1, (nextop & 7) + (rex.b << 3), 1);
+                VEXTRINS_D(v1, v0, 0x01);
+            } else {
+                addr = geted(dyn, addr, ninst, nextop, &ed, x2, x3, &fixedaddress, rex, NULL, 1, 0);
+                v1 = fpu_get_scratch(dyn);
+                VEXTRINS_D(v1, v0, 0x01);
+                FST_D(v1, ed, fixedaddress);
+                SMWRITE2();
+            }
+            break;
         case 0x18:
             nextop = F8;
             if ((nextop & 0xC0) == 0xC0) {
@@ -283,6 +322,21 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
 
         #undef GO
 
+        case 0x51:
+            INST_NAME("SQRTPS Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX_empty(v0);
+            VFSQRT_S(v0, q0);
+            break;
+        case 0x52:
+            INST_NAME("RSQRTPS Gx, Ex");
+            nextop = F8;
+            SKIPTEST(x1);
+            GETEX(q0, 0, 0);
+            GETGX_empty(q1);
+            VFRSQRT_S(q1, q0);
+            break;
         case 0x54:
             INST_NAME("ANDPS Gx, Ex");
             nextop = F8;
@@ -318,6 +372,20 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 VXOR_V(q0, q0, q1);
             }
             break;
+        case 0x58:
+            INST_NAME("ADDPS Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(v0, 1);
+            VFADD_S(v0, v0, q0);
+            break;
+        case 0x59:
+            INST_NAME("MULPS Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(v0, 1);
+            VFMUL_S(v0, v0, q0);
+            break;
         case 0x5A:
             INST_NAME("CVTPS2PD Gx, Ex");
             nextop = F8;
@@ -325,6 +393,13 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             GETGX(q1, 1);
             VFCVTL_D_S(q1, q0);
             break;
+        case 0x5C:
+            INST_NAME("SUBPS Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(v0, 1);
+            VFSUB_S(v0, v0, q0);
+            break;
 
         #define GO(GETFLAGS, NO, YES, F, I)                                                         \
             if (box64_dynarec_test == 2) { NOTEST(x1); }                                            \
diff --git a/src/dynarec/la64/dynarec_la64_66.c b/src/dynarec/la64/dynarec_la64_66.c
index fefd7056..3dd04ede 100644
--- a/src/dynarec/la64/dynarec_la64_66.c
+++ b/src/dynarec/la64/dynarec_la64_66.c
@@ -58,6 +58,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
         return dynarec64_00(dyn, addr - 1, ip, ninst, rex, rep, ok, need_epilog);         // addr-1, to "put back" opcode
 
     switch (opcode) {
+        case 0x01:
+            INST_NAME("ADD Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_add16(dyn, ninst, x1, x2, x4, x5, x6);
+            EWBACK;
+            break;
         case 0x03:
             INST_NAME("ADD Gw, Ew");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -67,6 +76,15 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_add16(dyn, ninst, x1, x2, x3, x4, x6);
             GWBACK;
             break;
+        case 0x05:
+            INST_NAME("ADD AX, Iw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            i32 = F16;
+            BSTRPICK_D(x1, xRAX, 15, 0);
+            MOV32w(x2, i32);
+            emit_add16(dyn, ninst, x1, x2, x3, x4, x6);
+            BSTRINS_D(xRAX, x1, 15, 0);
+            break;
         case 0x09:
             INST_NAME("OR Ew, Gw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -93,6 +111,24 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
             emit_sbb16(dyn, ninst, x1, x2, x4, x5, x6);
             EWBACK;
             break;
+        case 0x21:
+            INST_NAME("AND Ew, Gw");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x2);
+            GETEW(x1, 0);
+            emit_and16(dyn, ninst, x1, x2, x4, x5);
+            EWBACK;
+            break;
+        case 0x23:
+            INST_NAME("AND Gw, Ew");
+            SETFLAGS(X_ALL, SF_SET_PENDING);
+            nextop = F8;
+            GETGW(x1);
+            GETEW(x2, 0);
+            emit_and16(dyn, ninst, x1, x2, x3, x4);
+            GWBACK;
+            break;
         case 0x25:
             INST_NAME("AND AX, Iw");
             SETFLAGS(X_ALL, SF_SET_PENDING);
@@ -411,6 +447,46 @@ uintptr_t dynarec64_66(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITELOCK(lock);
             }
             break;
+        case 0xD1:
+        case 0xD3:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 4:
+                case 6:
+                    if (opcode == 0xD1) {
+                        INST_NAME("SHL Ew, 1");
+                        MOV32w(x2, 1);
+                    } else {
+                        INST_NAME("SHL Ew, CL");
+                        ANDI(x2, xRCX, 0x1f);
+                        BEQ_NEXT(x2, xZR);
+                    }
+                    SETFLAGS(X_ALL, SF_SET_PENDING); // some flags are left undefined
+                    if (box64_dynarec_safeflags > 1)
+                        MAYSETFLAGS();
+                    GETEW(x1, 0);
+                    emit_shl16(dyn, ninst, x1, x2, x5, x4, x6);
+                    EWBACK;
+                    break;
+                default:
+                    DEFAULT;
+            }
+            break;
+        case 0xF7:
+            nextop = F8;
+            switch ((nextop >> 3) & 7) {
+                case 0:
+                case 1:
+                    INST_NAME("TEST Ew, Iw");
+                    SETFLAGS(X_ALL, SF_SET_PENDING);
+                    GETEW(x1, 2);
+                    u16 = F16;
+                    MOV32w(x2, u16);
+                    emit_test16(dyn, ninst, x1, x2, x3, x4, x5);
+                    break;
+                default:
+                    DEFAULT;
+            }
         default:
             DEFAULT;
     }
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 3064397f..2929118a 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -248,6 +248,27 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                     DEFAULT;
             }
             break;
+        case 0x54:
+            INST_NAME("ANDPD Gx, Ex");
+            nextop = F8;
+            GETEX(q0, 0, 0);
+            GETGX(v0, 1);
+            VAND_V(v0, v0, q0);
+            break;
+        case 0x57:
+            INST_NAME("XORPD Gx, Ex");
+            nextop = F8;
+            GETG;
+            if (MODREG && ((nextop & 7) + (rex.b << 3) == gd)) {
+                // special case for XORPD Gx, Gx
+                q0 = sse_get_reg_empty(dyn, ninst, x1, gd);
+                VXOR_V(q0, q0, q0);
+            } else {
+                q0 = sse_get_reg(dyn, ninst, x1, gd, 1);
+                GETEX(q1, 0, 0);
+                VXOR_V(q0, q0, q1);
+            }
+            break;
         case 0x5A:
             INST_NAME("CVTPD2PS Gx, Ex");
             nextop = F8;
@@ -601,6 +622,18 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SMWRITE2();
             }
             break;
+        case 0xAF:
+            INST_NAME("IMUL Gw,Ew");
+            SETFLAGS(X_ALL, SF_PENDING);
+            nextop = F8;
+            UFLAG_DF(x1, d_imul16);
+            GETSEW(x1, 0);
+            GETSGW(x2);
+            MUL_W(x2, x2, x1);
+            UFLAG_RES(x2);
+            BSTRPICK_D(x2, x2, 15, 0);
+            GWBACK;
+            break;
         case 0xBE:
             INST_NAME("MOVSX Gw, Eb");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index d9958f27..bee0acfa 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -1024,3 +1024,101 @@ void emit_neg32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s
         ORI(xFlags, xFlags, 1 << F_ZF);
     }
 }
+
+// emit ADC32 instruction, from s1, s2, store result in s1 using s3 and s4 as scratch
+void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6)
+{
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, op1));
+        SDxw(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s3, rex.w ? d_adc64 : d_adc32b);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            if (rex.w)
+                X64_ADC_D(s1, s2);
+            else
+                X64_ADC_W(s1, s2);
+        }
+        if (rex.w)
+            ADC_D(s1, s1, s2);
+        else
+            ADC_W(s1, s1, s2);
+
+        IFX (X_PEND) {
+            SDxw(s1, xEmu, offsetof(x64emu_t, res));
+        }
+        return;
+    }
+
+    IFX (X_CF) {
+        if (rex.w) {
+            AND(s5, xMASK, s1);
+            AND(s4, xMASK, s2);
+            ADD_D(s5, s5, s4); // lo
+            SRLI_D(s3, s1, 0x20);
+            SRLI_D(s4, s2, 0x20);
+            ADD_D(s4, s4, s3);
+            SRLI_D(s5, s5, 0x20);
+            ADD_D(s5, s5, s4); // hi
+            SRAI_D(s6, s5, 0x20);
+        } else {
+            AND(s3, s1, xMASK);
+            AND(s4, s2, xMASK);
+            ADD_D(s5, s3, s4);
+            SRLI_D(s6, s5, 0x20);
+        }
+    }
+    IFX (X_AF | X_OF) {
+        OR(s4, s1, s2);  // s4 = op1 | op2
+        AND(s5, s1, s2); // s5 = op1 & op2
+    }
+
+    ADDxw(s1, s1, s2);
+    ANDI(s3, xFlags, 1 << F_CF);
+    ADDxw(s1, s1, s3);
+
+    IFX (X_PEND) {
+        SDxw(s1, xEmu, offsetof(x64emu_t, res));
+    }
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF) {
+        BEQZ(s6, 8);
+        ORI(xFlags, xFlags, 1 << F_CF);
+    }
+    IFX (X_AF | X_OF) {
+        ANDN(s3, s4, s1); // s3 = ~res & (op1 | op2)
+        OR(s3, s3, s5);   // cc = (~res & (op1 | op2)) | (op1 & op2)
+        IFX (X_AF) {
+            ANDI(s4, s3, 0x08); // AF: cc & 0x08
+            BEQZ(s4, 8);
+            ORI(xFlags, xFlags, 1 << F_AF);
+        }
+        IFX (X_OF) {
+            SRLI_D(s3, s3, rex.w ? 62 : 30);
+            SRLI_D(s4, s3, 1);
+            XOR(s3, s3, s4);
+            ANDI(s3, s3, 1); // OF: xor of two MSB's of cc
+            BEQZ(s3, 8);
+            ORI(xFlags, xFlags, 1 << F_OF);
+        }
+    }
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    if (!rex.w) {
+        ZEROUP(s1);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+}
diff --git a/src/dynarec/la64/dynarec_la64_emit_shift.c b/src/dynarec/la64/dynarec_la64_emit_shift.c
index 6bea6301..9820a754 100644
--- a/src/dynarec/la64/dynarec_la64_emit_shift.c
+++ b/src/dynarec/la64/dynarec_la64_emit_shift.c
@@ -22,6 +22,70 @@
 #include "dynarec_la64_functions.h"
 #include "dynarec_la64_helper.h"
 
+// emit SHL16 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
+void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5)
+{
+    // s2 is not 0 here and is 1..1f/3f
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, op1));
+        ST_H(s2, xEmu, offsetof(x64emu_t, op2));
+        SET_DF(s4, d_shl16);
+    } else IFX (X_ALL) {
+        SET_DFNONE();
+    }
+
+    if (la64_lbt) {
+        IFX (X_ALL) {
+            X64_SLL_H(s1, s2);
+        }
+        SLL_D(s1, s1, s2);
+        BSTRPICK_D(s1, s1, 15, 0);
+        IFX (X_PEND) {
+            ST_H(s1, xEmu, offsetof(x64emu_t, res));
+        }
+
+        return;
+    }
+
+    SLL_D(s1, s1, s2);
+
+    CLEAR_FLAGS(s3);
+    IFX (X_CF | X_OF) {
+        SRLI_D(s5, s1, 16);
+        ANDI(s5, s5, 1); // LSB == F_CF
+        IFX (X_CF) {
+            OR(xFlags, xFlags, s5);
+        }
+    }
+
+    SLLI_D(s1, s1, 48);
+    IFX (X_SF) {
+        BGE(s1, xZR, 8);
+        ORI(xFlags, xFlags, 1 << F_SF);
+    }
+    SRLI_D(s1, s1, 48);
+
+    IFX (X_PEND) {
+        ST_H(s1, xEmu, offsetof(x64emu_t, res));
+    }
+    IFX (X_ZF) {
+        BNEZ(s1, 8);
+        ORI(xFlags, xFlags, 1 << F_ZF);
+    }
+    IFX (X_OF) {
+        // OF flag is affected only on 1-bit shifts
+        ADDI_D(s3, s2, -1);
+        BNEZ(s3, 4 + 4 * 4);
+        SRLI_D(s3, s1, 15);
+        XOR(s3, s3, s5);
+        SLLI_D(s3, s3, F_OF);
+        OR(xFlags, xFlags, s3);
+    }
+    IFX (X_PF) {
+        emit_pf(dyn, ninst, s1, s3, s4);
+    }
+}
+
 // emit SHL32 instruction, from s1 , shift s2, store result in s1 using s3, s4 and s5 as scratch
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5)
 {
diff --git a/src/dynarec/la64/dynarec_la64_f20f.c b/src/dynarec/la64/dynarec_la64_f20f.c
index 903d7e58..e065c569 100644
--- a/src/dynarec/la64/dynarec_la64_f20f.c
+++ b/src/dynarec/la64/dynarec_la64_f20f.c
@@ -93,6 +93,65 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             }
             VEXTRINS_D(v0, d1, 0);
             break;
+        case 0x2C:
+            INST_NAME("CVTTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSD(q0, 0, 0);
+            if (!box64_dynarec_fastround) {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+            }
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                FTINTRZ_L_D(d1, q0);
+                MOVFR2GR_D(gd, d1);
+            } else {
+                FTINTRZ_W_D(d1, q0);
+                MOVFR2GR_S(gd, d1);
+            }
+            if (!rex.w) ZEROUP(gd);
+            if (!box64_dynarec_fastround) {
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                CBZ_NEXT(x5);
+                if (rex.w) {
+                    MOV64x(gd, 0x8000000000000000LL);
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
+        case 0x2D:
+            INST_NAME("CVTSD2SI Gd, Ex");
+            nextop = F8;
+            GETGD;
+            GETEXSD(q0, 0, 0);
+            if (!box64_dynarec_fastround) {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+            }
+            d1 = fpu_get_scratch(dyn);
+            u8 = sse_setround(dyn, ninst, x2, x3);
+            if (rex.w) {
+                FTINT_L_D(d1, q0);
+                MOVFR2GR_D(gd, d1);
+            } else {
+                FTINT_W_D(d1, q0);
+                MOVFR2GR_S(gd, d1);
+            }
+            x87_restoreround(dyn, ninst, u8);
+            if (!box64_dynarec_fastround) {
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                CBZ_NEXT(x5);
+                if (rex.w) {
+                    MOV64x(gd, 0x8000000000000000LL);
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
         case 0x58:
             INST_NAME("ADDSD Gx, Ex");
             nextop = F8;
@@ -127,6 +186,15 @@ uintptr_t dynarec64_F20F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             MARK;
             VEXTRINS_D(v0, d0, 0); // v0[63:0] = d0[63:0]
             break;
+        case 0x5A:
+            INST_NAME("CVTSD2SS Gx, Ex");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEXSD(d0, 0, 0);
+            d1 = fpu_get_scratch(dyn);
+            FCVT_S_D(d1, d0);
+            VEXTRINS_W(v0, d1, 0);
+            break;
         case 0x5C:
             INST_NAME("SUBSD Gx, Ex");
             nextop = F8;
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index 8965002d..9c59e59c 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -585,6 +585,43 @@ void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st)
     // TODO
 }
 
+// Set rounding according to mxcsr flags, return reg to restore flags
+int sse_setround(dynarec_la64_t* dyn, int ninst, int s1, int s2)
+{
+    MAYUSE(dyn);
+    MAYUSE(ninst);
+    MAYUSE(s1);
+    MAYUSE(s2);
+    LD_W(s1, xEmu, offsetof(x64emu_t, mxcsr));
+    SRLI_D(s1, s1, 13);
+    ANDI(s1, s1, 0b11);
+    // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop
+    // LA64: 0..3: Nearest, TowardZero, TowardsPositive, TowardsNegative
+    // 0->0, 1->3, 2->2, 3->1
+    BEQ(s1, xZR, 32);
+    ADDI_D(s2, xZR, 2);
+    BEQ(s1, s2, 24);
+    ADDI_D(s2, xZR, 3);
+    BEQ(s1, s2, 12);
+    ADDI_D(s1, xZR, 3);
+    B(8);
+    ADDI_D(s1, xZR, 1);
+    // done
+    SLLI_D(s1, s1, 8);
+    MOVFCSR2GR(s2, FCSR3);
+    MOVGR2FCSR(FCSR3, s1); // exange RM with current
+    return s2;
+}
+
+// Restore round flag
+void x87_restoreround(dynarec_la64_t* dyn, int ninst, int s1)
+{
+    MAYUSE(dyn);
+    MAYUSE(ninst);
+    MAYUSE(s1);
+    MOVGR2FCSR(FCSR3, s1);
+}
+
 // SSE / SSE2 helpers
 // get lsx register for a SSE reg, create the entry if needed
 int sse_get_reg(dynarec_la64_t* dyn, int ninst, int s1, int a, int forwrite)
diff --git a/src/dynarec/la64/dynarec_la64_helper.h b/src/dynarec/la64/dynarec_la64_helper.h
index d7bf5efd..bebc24c6 100644
--- a/src/dynarec/la64/dynarec_la64_helper.h
+++ b/src/dynarec/la64/dynarec_la64_helper.h
@@ -176,6 +176,11 @@
         addr = fakeed(dyn, addr, ninst, nextop); \
     }
 
+// GETGW extract x64 register in gd, that is i, Signed extented
+#define GETSGW(i)                                        \
+    gd = TO_LA64(((nextop & 0x38) >> 3) + (rex.r << 3)); \
+    EXT_W_H(i, gd);                                      \
+    gd = i;
 
 // Write back ed in wback (if wback not 0)
 #define WBACK                              \
@@ -186,7 +191,20 @@
             ST_W(ed, wback, fixedaddress); \
         SMWRITE();                         \
     }
-
+// GETSEW will use i for ed, and can use r3 for wback. This is the Signed version
+#define GETSEW(i, D)                                                                           \
+    if (MODREG) {                                                                              \
+        wback = TO_LA64((nextop & 7) + (rex.b << 3));                                          \
+        EXT_W_H(i, wback);                                                                     \
+        ed = i;                                                                                \
+        wb1 = 0;                                                                               \
+    } else {                                                                                   \
+        SMREAD();                                                                              \
+        addr = geted(dyn, addr, ninst, nextop, &wback, x3, i, &fixedaddress, rex, NULL, 1, D); \
+        LD_H(i, wback, fixedaddress);                                                          \
+        ed = i;                                                                                \
+        wb1 = 1;                                                                               \
+    }
 // Write w back to original register / memory (w needs to be 16bits only!)
 #define EWBACKW(w)                    \
     if (wb1) {                        \
@@ -734,6 +752,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_add8           STEPNAME(emit_add8)
 #define emit_add8c          STEPNAME(emit_add8c)
 #define emit_add16          STEPNAME(emit_add16)
+#define emit_adc32          STEPNAME(emit_adc32)
 #define emit_sub16          STEPNAME(emit_sub16)
 #define emit_sub32          STEPNAME(emit_sub32)
 #define emit_sub32c         STEPNAME(emit_sub32c)
@@ -760,6 +779,7 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 #define emit_and16          STEPNAME(emit_and16)
 #define emit_and32          STEPNAME(emit_and32)
 #define emit_and32c         STEPNAME(emit_and32c)
+#define emit_shl16          STEPNAME(emit_shl16)
 #define emit_shl32          STEPNAME(emit_shl32)
 #define emit_shl32c         STEPNAME(emit_shl32c)
 #define emit_shr8           STEPNAME(emit_shr8)
@@ -771,7 +791,8 @@ void* la64_next(x64emu_t* emu, uintptr_t addr);
 
 #define emit_pf STEPNAME(emit_pf)
 
-
+#define x87_restoreround  STEPNAME(x87_restoreround)
+#define sse_setround      STEPNAME(sse_setround)
 #define x87_forget       STEPNAME(x87_forget)
 #define sse_purge07cache STEPNAME(sse_purge07cache)
 #define sse_get_reg       STEPNAME(sse_get_reg)
@@ -818,6 +839,7 @@ void emit_add32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, i
 void emit_add8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_add8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s2, int s3, int s4);
 void emit_add16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
+void emit_adc32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5, int s6);
 void emit_sub16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_sub32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_sub32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s2, int s3, int s4, int s5);
@@ -844,6 +866,7 @@ void emit_and8c(dynarec_la64_t* dyn, int ninst, int s1, int32_t c, int s3, int s
 void emit_and16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4);
 void emit_and32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4);
 void emit_and32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int64_t c, int s3, int s4);
+void emit_shl16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, int s2, int s3, int s4, int s5);
 void emit_shl32c(dynarec_la64_t* dyn, int ninst, rex_t rex, int s1, uint32_t c, int s3, int s4, int s5);
 void emit_shr8(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3, int s4, int s5);
@@ -866,7 +889,10 @@ void fpu_reflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3);
 void fpu_unreflectcache(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3);
 void fpu_pushcache(dynarec_la64_t* dyn, int ninst, int s1, int not07);
 void fpu_popcache(dynarec_la64_t* dyn, int ninst, int s1, int not07);
-
+// Restore round flag
+void x87_restoreround(dynarec_la64_t* dyn, int ninst, int s1);
+// Set rounding according to mxcsr flags, return reg to restore flags
+int sse_setround(dynarec_la64_t* dyn, int ninst, int s1, int s2);
 // refresh a value from the cache ->emu and then forget the cache (nothing done if value is not cached)
 void x87_forget(dynarec_la64_t* dyn, int ninst, int s1, int s2, int st);
 
diff --git a/src/dynarec/la64/la64_emitter.h b/src/dynarec/la64/la64_emitter.h
index 9951f070..29713ca6 100644
--- a/src/dynarec/la64/la64_emitter.h
+++ b/src/dynarec/la64/la64_emitter.h
@@ -1246,6 +1246,12 @@ LSX instruction starts with V, LASX instruction starts with XV.
 #define VFMAXA_D(vd, vj, vk)        EMIT(type_3R(0b01110001010000010, vk, vj, vd))
 #define VFMINA_S(vd, vj, vk)        EMIT(type_3R(0b01110001010000101, vk, vj, vd))
 #define VFMINA_D(vd, vj, vk)        EMIT(type_3R(0b01110001010000110, vk, vj, vd))
+#define VFSQRT_S(vd, vj)            EMIT(type_2R(0b0111001010011100111001, vj, vd))
+#define VFSQRT_D(vd, vj)            EMIT(type_2R(0b0111001010011100111010, vj, vd))
+#define VFRECIP_S(vd, vj)           EMIT(type_2R(0b0111001010011100111101, vj, vd))
+#define VFRECIP_D(vd, vj)           EMIT(type_2R(0b0111001010011100111110, vj, vd))
+#define VFRSQRT_S(vd, vj)           EMIT(type_2R(0b0111001010011101000001, vj, vd))
+#define VFRSQRT_D(vd, vj)           EMIT(type_2R(0b0111001010011101000010, vj, vd))
 #define VFCVTL_S_H(vd, vj)          EMIT(type_2R(0b0111001010011101111010, vj, vd))
 #define VFCVTH_S_H(vd, vj)          EMIT(type_2R(0b0111001010011101111011, vj, vd))
 #define VFCVTL_D_S(vd, vj)          EMIT(type_2R(0b0111001010011101111100, vj, vd))