diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_0f.c | 78 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 88 |
2 files changed, 166 insertions, 0 deletions
diff --git a/src/dynarec/la64/dynarec_la64_0f.c b/src/dynarec/la64/dynarec_la64_0f.c index 110b0252..0f2290cb 100644 --- a/src/dynarec/la64/dynarec_la64_0f.c +++ b/src/dynarec/la64/dynarec_la64_0f.c @@ -283,6 +283,17 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni SMWRITE2(); } break; + case 0x2A: + INST_NAME("CVTPI2PS Gx,Em"); + nextop = F8; + GETGX(v0, 1); + GETEM(v1, 0); + q0 = fpu_get_scratch(dyn); + u8 = sse_setround(dyn, ninst, x1, x2); + VFFINT_S_W(q0, v1); + x87_restoreround(dyn, ninst, u8); + VEXTRINS_D(v0, q0, VEXTRINS_IMM_4_0(0, 0)); + break; case 0x2B: INST_NAME("MOVNTPS Ex,Gx"); nextop = F8; @@ -297,6 +308,73 @@ uintptr_t dynarec64_0F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int ni VST(v0, ed, fixedaddress); } break; + case 0x2C: + INST_NAME("CVTTPS2PI Gm,Ex"); + nextop = F8; + GETGM(v0); + GETEX(v1, 0, 0); + if (BOX64ENV(dynarec_fastround)) { + VFTINTRZ_W_S(v0, v1); + } else { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VFTINTRZ_W_S(v0, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + BEQZ_MARK3(x5); // no fp exception, work done. + + // check +/-Nan, +overlow ,replace with 0x80000000 + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); // mask + d0 = fpu_get_scratch(dyn); + VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + VLDI(d0, (0b10011 << 8) | 0x4f); + VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark + VBITSEL_V(v0, v0, q0, q1); + + MARK3; + } + break; + case 0x2D: + INST_NAME("CVTPS2PI Gm, Ex"); + nextop = F8; + GETGM(v0); + GETEX(v1, 0, 0); + u8 = sse_setround(dyn, ninst, x4, x6); + if (BOX64ENV(dynarec_fastround)) { + VFTINTRZ_W_S(v0, v1); + } else { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VFTINT_W_S(v0, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + BEQZ_MARK3(x5); // no fp exception, work done, fast path. + + // check +/-Nan, +overlow ,replace with 0x80000000 + /* LoongArch follow IEEE754-2008, + if val < -2147483648.0f got -2147483648 match sse + if var > 2147483648.0f got 2147483647 need mask + but lucky _Float32 is not accurate: + -2147483648.0f is 0xcf000000 (_Float32) + -2147483520.0f is 0xceffffff (_Float32) + 2147483648.0f is 0x4f000000 (_Float32) + 2147483520.0f is 0x4effffff (_Float32) + combine (unorder || gt 0x4f000000) + use cULE for (unodered || 0x4f000000 <= v1[x]) + */ + q0 = fpu_get_scratch(dyn); + q1 = fpu_get_scratch(dyn); // mask + d0 = fpu_get_scratch(dyn); + VLDI(q0, 0b1001110000000); // broadcast 0x80000000 to all + VLDI(d0, (0b10011 << 8) | 0x4f); + VFCMP_S(q1, d0, v1, cULE); // get Nan,+overflow mark + VBITSEL_V(v0, v0, q0, q1); + + MARK3; + } + x87_restoreround(dyn, ninst, u8); + break; case 0x2E: // no special check... case 0x2F: diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index fc375fd7..c0619ff8 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -208,6 +208,14 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int SMWRITE2(); } break; + case 0x2A: + INST_NAME("CVTPI2PD Gx,Em"); + nextop = F8; + GETGX(v0, 1); + GETEM(v1, 0); + q0 = fpu_get_scratch(dyn); + VFFINTL_D_W(v0, v1); + break; case 0x2B: INST_NAME("MOVNTPD Ex,Gx"); nextop = F8; @@ -222,6 +230,86 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VST(v0, ed, fixedaddress); } break; + case 0x2C: + INST_NAME("CVTTPD2PI Gm,Ex"); + nextop = F8; + GETGM(v0); + GETEX(v1, 0, 0); + if (BOX64ENV(dynarec_fastround)) { + VFTINTRZ_W_D(v0, v1, v1); + } else { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VFTINTRZ_W_D(v0, v1, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + BEQZ_MARK3(x5); // no fp exception, work done. + + q0 = fpu_get_scratch(dyn); + MOVGR2FCSR(FCSR2, xZR); // reset all bits + FTINTRZ_W_D(v0, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + AND(x5, x5, x3); + BEQZ_MARK(x5); + MOV32w(x1, 0x80000000); + MOVGR2FR_W(v0, x1); + MARK; + + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VSHUF4I_W(q0, v1, 0b1110); // get v1 high 64bits + FTINTRZ_W_D(q0, q0); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + AND(x5, x5, x3); + BEQZ_MARK2(x5); + MOV32w(x1, 0x80000000); + MOVGR2FRH_W(v0, x1); + B_MARK3_nocond; + MARK2; + VEXTRINS_W(v0, q0, VEXTRINS_IMM_4_0(1, 0)); + MARK3; + } + break; + case 0x2D: + INST_NAME("CVTPD2PI Gm,Ex"); + nextop = F8; + GETGM(v0); + GETEX(v1, 0, 0); + u8 = sse_setround(dyn, ninst, x4, x6); + if (BOX64ENV(dynarec_fastround)) { + VFTINT_W_D(v0, v1, v1); + } else { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VFTINT_W_D(v0, v1, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + BEQZ_MARK3(x5); // no fp exception, work done. + + q0 = fpu_get_scratch(dyn); + MOVGR2FCSR(FCSR2, xZR); // reset all bits + FTINT_W_D(v0, v1); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + AND(x5, x5, x3); + BEQZ_MARK(x5); + MOV32w(x1, 0x80000000); + MOVGR2FR_W(v0, x1); + MARK; + + MOVGR2FCSR(FCSR2, xZR); // reset all bits + VSHUF4I_W(q0, v1, 0b1110); // get v1 high 64bits + FTINT_W_D(q0, q0); + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + AND(x5, x5, x3); + BEQZ_MARK2(x5); + MOV32w(x1, 0x80000000); + MOVGR2FRH_W(v0, x1); + B_MARK3_nocond; + MARK2; + VEXTRINS_W(v0, q0, VEXTRINS_IMM_4_0(1, 0)); + MARK3; + } + x87_restoreround(dyn, ninst, u8); + break; case 0x2E: // no special check... case 0x2F: |