about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/dynarec/la64/dynarec_la64_0f.c78
-rw-r--r--src/dynarec/la64/dynarec_la64_660f.c88
2 files changed, 166 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c
index 110b0252..0f2290cb 100644
--- a/src/dynarec/la64/dynarec_la64_0f.c
+++ b/src/dynarec/la64/dynarec_la64_0f.c
@@ -283,6 +283,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 SMWRITE2();
             }
             break;
+        case 0x2A:
+            INST_NAME("CVTPI2PS Gx,Em");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEM(v1, 0);
+            q0 = fpu_get_scratch(dyn);
+            u8 = sse_setround(dyn, ninst, x1, x2);
+            VFFINT_S_W(q0, v1);
+            x87_restoreround(dyn, ninst, u8);
+            VEXTRINS_D(v0, q0, VEXTRINS_IMM_4_0(0, 0));
+            break;
         case 0x2B:
             INST_NAME("MOVNTPS Ex,Gx");
             nextop = F8;
@@ -297,6 +308,73 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni
                 VST(v0, ed, fixedaddress);
             }
             break;
+        case 0x2C:
+            INST_NAME("CVTTPS2PI Gm,Ex");
+            nextop = F8;
+            GETGM(v0);
+            GETEX(v1, 0, 0);
+            if (BOX64ENV(dynarec_fastround)) {
+                VFTINTRZ_W_S(v0, v1);
+            } else {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                VFTINTRZ_W_S(v0, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                BEQZ_MARK3(x5); // no fp exception, work done.
+
+                // check +/-Nan, +overlow ,replace with 0x80000000
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn); // mask
+                d0 = fpu_get_scratch(dyn);
+                VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                VLDI(d0, (0b10011 << 8) | 0x4f);
+                VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
+                VBITSEL_V(v0, v0, q0, q1);
+
+                MARK3;
+            }
+            break;
+        case 0x2D:
+            INST_NAME("CVTPS2PI Gm, Ex");
+            nextop = F8;
+            GETGM(v0);
+            GETEX(v1, 0, 0);
+            u8 = sse_setround(dyn, ninst, x4, x6);
+            if (BOX64ENV(dynarec_fastround)) {
+                VFTINTRZ_W_S(v0, v1);
+            } else {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                VFTINT_W_S(v0, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                BEQZ_MARK3(x5); // no fp exception, work done, fast path.
+
+                // check +/-Nan, +overlow ,replace with 0x80000000
+                /* LoongArch follow IEEE754-2008,
+                   if val < -2147483648.0f got -2147483648 match sse
+                   if var >  2147483648.0f got  2147483647 need mask
+                   but lucky _Float32 is not accurate:
+                   -2147483648.0f is 0xcf000000 (_Float32)
+                   -2147483520.0f is 0xceffffff (_Float32)
+                    2147483648.0f is 0x4f000000 (_Float32)
+                    2147483520.0f is 0x4effffff (_Float32)
+                   combine (unorder || gt 0x4f000000)
+                   use cULE  for (unodered || 0x4f000000 <= v1[x])
+                */
+                q0 = fpu_get_scratch(dyn);
+                q1 = fpu_get_scratch(dyn); // mask
+                d0 = fpu_get_scratch(dyn);
+                VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all
+                VLDI(d0, (0b10011 << 8) | 0x4f);
+                VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark
+                VBITSEL_V(v0, v0, q0, q1);
+
+                MARK3;
+            }
+            x87_restoreround(dyn, ninst, u8);
+            break;
         case 0x2E:
             // no special check...
         case 0x2F:
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c
index fc375fd7..c0619ff8 100644
--- a/src/dynarec/la64/dynarec_la64_660f.c
+++ b/src/dynarec/la64/dynarec_la64_660f.c
@@ -208,6 +208,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 SMWRITE2();
             }
             break;
+        case 0x2A:
+            INST_NAME("CVTPI2PD Gx,Em");
+            nextop = F8;
+            GETGX(v0, 1);
+            GETEM(v1, 0);
+            q0 = fpu_get_scratch(dyn);
+            VFFINTL_D_W(v0, v1);
+            break;
         case 0x2B:
             INST_NAME("MOVNTPD Ex,Gx");
             nextop = F8;
@@ -222,6 +230,86 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int
                 VST(v0, ed, fixedaddress);
             }
             break;
+        case 0x2C:
+            INST_NAME("CVTTPD2PI Gm,Ex");
+            nextop = F8;
+            GETGM(v0);
+            GETEX(v1, 0, 0);
+            if (BOX64ENV(dynarec_fastround)) {
+                VFTINTRZ_W_D(v0, v1, v1);
+            } else {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                VFTINTRZ_W_D(v0, v1, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                BEQZ_MARK3(x5); // no fp exception, work done.
+
+                q0 = fpu_get_scratch(dyn);
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                FTINTRZ_W_D(v0, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                AND(x5, x5, x3);
+                BEQZ_MARK(x5);
+                MOV32w(x1, 0x80000000);
+                MOVGR2FR_W(v0, x1);
+                MARK;
+
+                MOVGR2FCSR(FCSR2, xZR);    // reset all bits
+                VSHUF4I_W(q0, v1, 0b1110); // get v1 high 64bits
+                FTINTRZ_W_D(q0, q0);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                AND(x5, x5, x3);
+                BEQZ_MARK2(x5);
+                MOV32w(x1, 0x80000000);
+                MOVGR2FRH_W(v0, x1);
+                B_MARK3_nocond;
+                MARK2;
+                VEXTRINS_W(v0, q0, VEXTRINS_IMM_4_0(1, 0));
+                MARK3;
+            }
+            break;
+        case 0x2D:
+            INST_NAME("CVTPD2PI Gm,Ex");
+            nextop = F8;
+            GETGM(v0);
+            GETEX(v1, 0, 0);
+            u8 = sse_setround(dyn, ninst, x4, x6);
+            if (BOX64ENV(dynarec_fastround)) {
+                VFTINT_W_D(v0, v1, v1);
+            } else {
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                VFTINT_W_D(v0, v1, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                MOV32w(x3, (1 << FR_V) | (1 << FR_O));
+                AND(x5, x5, x3);
+                BEQZ_MARK3(x5); // no fp exception, work done.
+
+                q0 = fpu_get_scratch(dyn);
+                MOVGR2FCSR(FCSR2, xZR); // reset all bits
+                FTINT_W_D(v0, v1);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                AND(x5, x5, x3);
+                BEQZ_MARK(x5);
+                MOV32w(x1, 0x80000000);
+                MOVGR2FR_W(v0, x1);
+                MARK;
+
+                MOVGR2FCSR(FCSR2, xZR);    // reset all bits
+                VSHUF4I_W(q0, v1, 0b1110); // get v1 high 64bits
+                FTINT_W_D(q0, q0);
+                MOVFCSR2GR(x5, FCSR2); // get back FPSR to check
+                AND(x5, x5, x3);
+                BEQZ_MARK2(x5);
+                MOV32w(x1, 0x80000000);
+                MOVGR2FRH_W(v0, x1);
+                B_MARK3_nocond;
+                MARK2;
+                VEXTRINS_W(v0, q0, VEXTRINS_IMM_4_0(1, 0));
+                MARK3;
+            }
+            x87_restoreround(dyn, ninst, u8);
+            break;
         case 0x2E:
             // no special check...
         case 0x2F: