diff options
Diffstat (limited to 'src')
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_660f.c | 52 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_emit_math.c | 2 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_f30f.c | 34 | ||||
| -rw-r--r-- | src/dynarec/la64/dynarec_la64_helper.c | 8 |
4 files changed, 90 insertions, 6 deletions
diff --git a/src/dynarec/la64/dynarec_la64_660f.c b/src/dynarec/la64/dynarec_la64_660f.c index 02354595..9404ae20 100644 --- a/src/dynarec/la64/dynarec_la64_660f.c +++ b/src/dynarec/la64/dynarec_la64_660f.c @@ -327,6 +327,54 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int VOR_V(q0, q0, d0); } break; + case 0x16: + if (rex.w) { + INST_NAME("PEXTRQ Ed, Gx, Ib"); + } else { + INST_NAME("PEXTRD Ed, Gx, Ib"); + } + nextop = F8; + GETGX(q0, 0); + d0 = fpu_get_scratch(dyn); + if (MODREG) { + ed = TO_LA64((nextop & 7) + (rex.b << 3)); + u8 = F8; + if (rex.w) { + VBSRL_V(d0, q0, (u8 & 1) * 8); + MOVFR2GR_D(ed, d0); + } else { + VBSRL_V(d0, q0, (u8 & 3) * 4); + MOVFR2GR_S(ed, d0); + ZEROUP(ed); + } + } else { + addr = geted(dyn, addr, ninst, nextop, &ed, x3, x5, &fixedaddress, rex, NULL, 1, 1); + u8 = F8; + if (rex.w) { + VBSRL_V(d0, q0, (u8 & 1) * 8); + FST_D(d0, ed, fixedaddress); + } else { + VBSRL_V(d0, q0, (u8 & 3) * 4); + FST_S(d0, ed, fixedaddress); + } + SMWRITE2(); + } + break; + case 0x22: + INST_NAME("PINSRD Gx, ED, Ib"); + nextop = F8; + GETGX(q0, 1); + GETED(1); + u8 = F8; + d0 = fpu_get_scratch(dyn); + if (rex.w) { + MOVGR2FR_D(d0, ed); + VEXTRINS_D(q0, d0, (u8 & 1) << 4); + } else { + MOVGR2FR_W(d0, ed); + VEXTRINS_W(q0, d0, (u8 & 3) << 4); + } + break; case 0x44: INST_NAME("PCLMULQDQ Gx, Ex, Ib"); nextop = F8; @@ -495,6 +543,10 @@ uintptr_t dynarec64_660F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int GETEX(v1, 0, 0); GETGX_empty(v0); u8 = sse_setround(dyn, ninst, x6, x4); + if (v0 == v1 && !box64_dynarec_fastround) { + v1 = fpu_get_scratch(dyn); + VOR_V(v1, v0, v0); + } VFTINT_W_S(v0, v1); if (!box64_dynarec_fastround) { q0 = fpu_get_scratch(dyn); diff --git a/src/dynarec/la64/dynarec_la64_emit_math.c b/src/dynarec/la64/dynarec_la64_emit_math.c index 4abc9603..fcf3230f 100644 --- a/src/dynarec/la64/dynarec_la64_emit_math.c +++ b/src/dynarec/la64/dynarec_la64_emit_math.c @@ -1558,7 +1558,7 @@ void emit_neg16(dynarec_la64_t* dyn, int ninst, int s1, int s2, int s3) MV(s3, s1); // s3 = op1 } - NOR(s1, s1, xZR); + NEG_D(s1, s1); BSTRPICK_D(s1, s1, 15, 0); IFX (X_PEND) { ST_H(s1, xEmu, offsetof(x64emu_t, res)); diff --git a/src/dynarec/la64/dynarec_la64_f30f.c b/src/dynarec/la64/dynarec_la64_f30f.c index 8aa54ecf..5f116b2d 100644 --- a/src/dynarec/la64/dynarec_la64_f30f.c +++ b/src/dynarec/la64/dynarec_la64_f30f.c @@ -116,7 +116,39 @@ uintptr_t dynarec64_F30F(dynarec_la64_t* dyn, uintptr_t addr, uintptr_t ip, int MOVFR2GR_S(gd, d1); ZEROUP(gd); } - if (!rex.w) ZEROUP(gd); + if (!box64_dynarec_fastround) { + MOVFCSR2GR(x5, FCSR2); // get back FPSR to check + MOV32w(x3, (1 << FR_V) | (1 << FR_O)); + AND(x5, x5, x3); + CBZ_NEXT(x5); + if (rex.w) { + MOV64x(gd, 0x8000000000000000LL); + } else { + MOV32w(gd, 0x80000000); + } + } + break; + case 0x2D: + INST_NAME("CVTSS2SI Gd, Ex"); + if (addr >= 0x1033f98d && addr <= 0x1033f98d + 8) + EMIT(0); + nextop = F8; + GETGD; + GETEXSS(d0, 0, 0); + if (!box64_dynarec_fastround) { + MOVGR2FCSR(FCSR2, xZR); // reset all bits + } + u8 = sse_setround(dyn, ninst, x5, x6); + d1 = fpu_get_scratch(dyn); + if (rex.w) { + FTINT_L_S(d1, d0); + MOVFR2GR_D(gd, d1); + } else { + FTINT_W_S(d1, d0); + MOVFR2GR_S(gd, d1); + ZEROUP(gd); + } + x87_restoreround(dyn, ninst, u8); if (!box64_dynarec_fastround) { MOVFCSR2GR(x5, FCSR2); // get back FPSR to check MOV32w(x3, (1 << FR_V) | (1 << FR_O)); diff --git a/src/dynarec/la64/dynarec_la64_helper.c b/src/dynarec/la64/dynarec_la64_helper.c index d3954e63..b5b721fe 100644 --- a/src/dynarec/la64/dynarec_la64_helper.c +++ b/src/dynarec/la64/dynarec_la64_helper.c @@ -739,18 +739,18 @@ int sse_setround(dynarec_la64_t* dyn, int ninst, int s1, int s2) // MMX/x87 Round mode: 0..3: Nearest, Down, Up, Chop // LA64: 0..3: Nearest, TowardZero, TowardsPositive, TowardsNegative // 0->0, 1->3, 2->2, 3->1 - BEQ(s1, xZR, 32); + BEQ(s1, xZR, 4 + 4 * 8); // done + 4 ADDI_D(s2, xZR, 2); - BEQ(s1, s2, 24); + BEQ(s1, s2, 4 + 4 * 5); // done ADDI_D(s2, xZR, 3); - BEQ(s1, s2, 12); + BEQ(s1, s2, 4 + 4 * 2); ADDI_D(s1, xZR, 3); B(8); ADDI_D(s1, xZR, 1); // done SLLI_D(s1, s1, 8); MOVFCSR2GR(s2, FCSR3); - MOVGR2FCSR(FCSR3, s1); // exange RM with current + MOVGR2FCSR(FCSR3, s1); // exchange RM with current return s2; } |