about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c52
-rw-r--r--src/dynarec/la64/dynarec_la64_emit_math.c2
-rw-r--r--src/dynarec/la64/dynarec_la64_f30f.c34
-rw-r--r--src/dynarec/la64/dynarec_la64_helper.c8
4 files changed, 90 insertions, 6 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index 02354595..9404ae20 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -327,6 +327,54 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                         VOR_V(q0, q0, d0);
                     }
                     break;
+                case 0x16:
+                    if (rex.w) {
+                        INST_NAME("PEXTRQ Ed, Gx, Ib");
+                    } else {
+                        INST_NAME("PEXTRD Ed, Gx, Ib");
+                    }
+                    nextop = F8;
+                    GETGX(q0, 0);
+                    d0 = fpu_get_scratch(dyn);
+                    if (MODREG) {
+                        ed = TO_LA64((nextop & 7) + (rex.b << 3));
+                        u8 = F8;
+                        if (rex.w) {
+                            VBSRL_V(d0, q0, (u8 & 1) * 8);
+                            MOVFR2GR_D(ed, d0);
+                        } else {
+                            VBSRL_V(d0, q0, (u8 & 3) * 4);
+                            MOVFR2GR_S(ed, d0);
+                            ZEROUP(ed);
+                        }
+                    } else {
+                        addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 1, 1);
+                        u8 = F8;
+                        if (rex.w) {
+                            VBSRL_V(d0, q0, (u8 & 1) * 8);
+                            FST_D(d0, ed, fixedaddress);
+                        } else {
+                            VBSRL_V(d0, q0, (u8 & 3) * 4);
+                            FST_S(d0, ed, fixedaddress);
+                        }
+                        SMWRITE2();
+                    }
+                    break;
+                case 0x22:
+                    INST_NAME("PINSRD Gx, ED, Ib");
+                    nextop = F8;
+                    GETGX(q0, 1);
+                    GETED(1);
+                    u8 = F8;
+                    d0 = fpu_get_scratch(dyn);
+                    if (rex.w) {
+                        MOVGR2FR_D(d0, ed);
+                        VEXTRINS_D(q0, d0, (u8 & 1) << 4);
+                    } else {
+                        MOVGR2FR_W(d0, ed);
+                        VEXTRINS_W(q0, d0, (u8 & 3) << 4);
+                    }
+                    break;
                 case 0x44:
                     INST_NAME("PCLMULQDQ Gx, Ex, Ib");
                     nextop = F8;
@@ -495,6 +543,10 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
             GETEX(v1, 0, 0);
             GETGX_empty(v0);
             u8 = sse_setround(dyn, ninst, x6, x4);
+            if (v0 == v1 && !box64_dynarec_fastround) {
+                v1 = fpu_get_scratch(dyn);
+                VOR_V(v1, v0, v0);
+            }
             VFTINT_W_S(v0, v1);
             if (!box64_dynarec_fastround) {
                 q0 = fpu_get_scratch(dyn);
diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c
index 4abc9603..fcf3230f 100644
--- a/src/dynarec/la64/dynarec_la64_emit_math.c
+++ b/src/dynarec/la64/dynarec_la64_emit_math.c
@@ -1558,7 +1558,7 @@ void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3)
         MV(s3, s1); // s3 = op1
     }
 
-    NOR(s1, s1, xZR);
+    NEG_D(s1, s1);
     BSTRPICK_D(s1, s1, 15, 0);
     IFX (X_PEND) {
         ST_H(s1, xEmu, offsetof(x64emu_t, res));
diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c
index 8aa54ecf..5f116b2d 100644
--- a/src/dynarec/la64/dynarec_la64_f30f.c
+++ b/src/dynarec/la64/dynarec_la64_f30f.c
@@ -116,7 +116,39 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 MOVFR2GR_S(gd, d1);
                 ZEROUP(gd);
             }
-            if (!rex.w) ZEROUP(gd);
+            if (!box64_dynarec_fastround) {
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                CBZ_NEXT(x5);
+                if (rex.w) {
+                    MOV64x(gd, 0x8000000000000000LL);
+                } else {
+                    MOV32w(gd, 0x80000000);
+                }
+            }
+            break;
+        case 0x2D:
+            INST_NAME("CVTSS2SI Gd, Ex");
+            if (addr >= 0x1033f98d && addr <= 0x1033f98d + 8)
+                EMIT(0);
+            nextop = F8;
+            GETGD;
+            GETEXSS(d0, 0, 0);
+            if (!box64_dynarec_fastround) {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+            }
+            u8 = sse_setround(dyn, ninst, x5, x6);
+            d1 = fpu_get_scratch(dyn);
+            if (rex.w) {
+                FTINT_L_S(d1, d0);
+                MOVFR2GR_D(gd, d1);
+            } else {
+                FTINT_W_S(d1, d0);
+                MOVFR2GR_S(gd, d1);
+                ZEROUP(gd);
+            }
+            x87_restoreround(dyn, ninst, u8);
             if (!box64_dynarec_fastround) {
                 MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
                 MOV32w(x3, (1 << FR_V) | (1 << FR_O));
diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c
index d3954e63..b5b721fe 100644
--- a/src/dynarec/la64/dynarec_la64_helper.c
+++ b/src/dynarec/la64/dynarec_la64_helper.c
@@ -739,18 +739,18 @@ int sse_setround(dynarec_la64_t* dyn, int ninst, int s1, int s2)
     // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop
     // LA64: 0..3: Nearest, TowardZero, TowardsPositive, TowardsNegative
     // 0->0, 1->3, 2->2, 3->1
-    BEQ(s1, xZR, 32);
+    BEQ(s1, xZR, 4 + 4 * 8); // done + 4
     ADDI_D(s2, xZR, 2);
-    BEQ(s1, s2, 24);
+    BEQ(s1, s2, 4 + 4 * 5); // done
     ADDI_D(s2, xZR, 3);
-    BEQ(s1, s2, 12);
+    BEQ(s1, s2, 4 + 4 * 2);
     ADDI_D(s1, xZR, 3);
     B(8);
     ADDI_D(s1, xZR, 1);
     // done
     SLLI_D(s1, s1, 8);
     MOVFCSR2GR(s2, FCSR3);
-    MOVGR2FCSR(FCSR3, s1); // exange RM with current
+    MOVGR2FCSR(FCSR3, s1); // exchange RM with current
     return s2;
 }